From 2bc275e9b04f23708c495a8bfe92a5f4b65345c8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 5 Feb 2009 13:08:11 +0200 Subject: UBIFS: fix dbg_chk_lpt_sz() The debugging function dbg_chk_lpt_sz() was not working correctly for small min_io_unit size e.g. NOR flash. Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/lpt_commit.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 3216a1f277f8..27c97a1873d5 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c) while (offs + len > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c) if (offs + c->lsave_sz > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c) if (offs + c->ltab_sz > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -416,9 +416,8 @@ static int write_cnodes(struct ubifs_info *c) alen, UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 4, alen - wlen); } - dbg_chk_lpt_sz(c, 2, 0); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -477,7 +476,7 @@ static int write_cnodes(struct ubifs_info *c) UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 2, alen - wlen); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -504,7 +503,7 @@ static int write_cnodes(struct ubifs_info *c) UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 2, alen - wlen); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -1756,10 +1755,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) /** * dbg_chk_lpt_sz - check LPT does not write more than LPT size. * @c: the UBIFS file-system description object - * @action: action + * @action: what to do * @len: length written * * This function returns %0 on success and a negative error code on failure. + * The @action argument may be one of: + * o %0 - LPT debugging checking starts, initialize debugging variables; + * o %1 - wrote an LPT node, increase LPT size by @len bytes; + * o %2 - switched to a different LEB and wasted @len bytes; + * o %3 - check that we've written the right number of bytes. + * o %4 - wasted @len bytes; */ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { -- cgit v1.2.3 From ec32816f94a0baf90f5e73033dcdbc8679c7f91d Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Fri, 13 Feb 2009 09:13:11 +0100 Subject: UBIFS: list usage cleanup Trivial cleanup, list_del(); list_add{,_tail}() is equivalent to list_move{,_tail}(). Semantic patch for coccinelle can be found at www.cccmz.de/~snakebyte/list_move_tail.spatch Signed-off-by: Eric Sesterhenn Signed-off-by: Artem Bityutskiy --- fs/ubifs/log.c | 3 +-- fs/ubifs/shrinker.c | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 3e0aa7367556..1004261dc864 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c) bud->jhead, c->leb_size - bud->start, c->cmt_bud_bytes); rb_erase(p1, &c->buds); - list_del(&bud->list); /* * If the commit does not finish, the recovery will need * to replay the journal, in which case the old buds @@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c) * commit i.e. do not allow them to be garbage * collected. */ - list_add(&bud->list, &c->old_buds); + list_move(&bud->list, &c->old_buds); } } spin_unlock(&c->buds_lock); diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index e7bab52a1410..02feb59cefca 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention) * Move this one to the end of the list to provide some * fairness. */ - list_del(&c->infos_list); - list_add_tail(&c->infos_list, &ubifs_infos); + list_move_tail(&c->infos_list, &ubifs_infos); mutex_unlock(&c->umount_mutex); if (freed >= nr) break; @@ -263,8 +262,7 @@ static int kick_a_thread(void) } if (i == 1) { - list_del(&c->infos_list); - list_add_tail(&c->infos_list, &ubifs_infos); + list_move_tail(&c->infos_list, &ubifs_infos); spin_unlock(&ubifs_infos_lock); ubifs_request_bg_commit(c); -- cgit v1.2.3 From 3edaae7c5bda085b7dc704fe379f35b85e6f493e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Mar 2009 19:22:53 +0200 Subject: UBIFS: improve find function interface Make 'ubifs_find_free_space()' return offset where free space starts, rather than the amount of free space. This is just more appropriat for its caller. Signed-off-by: Artem Bityutskiy --- fs/ubifs/find.c | 12 ++++++------ fs/ubifs/journal.c | 5 ++--- fs/ubifs/ubifs.h | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 717d79c97c5e..1d54383d1269 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, * ubifs_find_free_space - find a data LEB with free space. * @c: the UBIFS file-system description object * @min_space: minimum amount of required free space - * @free: contains amount of free space in the LEB on exit + * @offs: contains offset of where free space starts on exit * @squeeze: whether to try to find space in a non-empty LEB first * * This function looks for an LEB with at least @min_space bytes of free space. @@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, * failed to find a LEB with @min_space bytes of free space and other a negative * error codes in case of failure. */ -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, int squeeze) { const struct ubifs_lprops *lprops; @@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, spin_unlock(&c->space_lock); } - *free = lprops->free; + *offs = c->leb_size - lprops->free; ubifs_release_lprops(c); - if (*free == c->leb_size) { + if (*offs == 0) { /* * Ensure that empty LEBs have been unmapped. They may not have * been, for example, because of an unclean unmount. Also @@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, return err; } - dbg_find("found LEB %d, free %d", lnum, *free); - ubifs_assert(*free >= min_space); + dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs); + ubifs_assert(*offs <= c->leb_size - min_space); return lnum; out: diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index a11ca0958a23..a2d334eccbca 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) */ static int reserve_space(struct ubifs_info *c, int jhead, int len) { - int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; + int err = 0, err1, retries = 0, avail, lnum, offs, squeeze; struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; /* @@ -139,10 +139,9 @@ again: * Write buffer wasn't seek'ed or there is no enough space - look for an * LEB with some empty space. */ - lnum = ubifs_find_free_space(c, len, &free, squeeze); + lnum = ubifs_find_free_space(c, len, &offs, squeeze); if (lnum >= 0) { /* Found an LEB, add it to the journal head */ - offs = c->leb_size - free; err = ubifs_add_bud_to_log(c, jhead, lnum, offs); if (err) goto out_return; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 039a68bee29a..2da1193a381f 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1500,7 +1500,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, int squeeze); int ubifs_find_free_leb_for_idx(struct ubifs_info *c); int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, -- cgit v1.2.3 From cb4f952db3a01a2d56eb17e0eb00ce99ae5f0f50 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 7 Mar 2009 20:53:41 +0200 Subject: UBIFS: amend key_hash return value ... which should be uint32_t, not int. Signed-off-by: Artem Bityutskiy --- fs/ubifs/key.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index efb3430a2581..5fa27ea031ba 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -381,8 +381,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) * @c: UBIFS file-system description object * @key: the key to get hash from */ -static inline int key_hash(const struct ubifs_info *c, - const union ubifs_key *key) +static inline uint32_t key_hash(const struct ubifs_info *c, + const union ubifs_key *key) { return key->u32[1] & UBIFS_S_KEY_HASH_MASK; } @@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c, * @c: UBIFS file-system description object * @k: the key to get hash from */ -static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) { const union ubifs_key *key = k; -- cgit v1.2.3 From f55aa59106b66cd547c8f296e0b3430ad76554c5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 23 Feb 2009 12:47:25 +0200 Subject: UBIFS: fix bug where page is marked uptodate when out of space UBIFS fast path in write_begin may mark a page up to date and then discover that there may not be enough space to do the write, and so fall back to a slow path. The slow path tries harder, but may still find no space - leaving the page marked up to date, when it is not. This patch ensures that the page is marked not up to date in that case. The bug that this patch fixes becomes evident when the write is into a hole (sparse file) or is at the end of the file and a subsequent read is off the end of the file. In both cases, the file system should return zeros but was instead returning the page that had not been written because the file system was out of space. Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/file.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 93b6de51f261..4e7f0aca9ebc 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -430,6 +430,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, struct ubifs_inode *ui = ubifs_inode(inode); pgoff_t index = pos >> PAGE_CACHE_SHIFT; int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + int skipped_read = 0; struct page *page; ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); @@ -444,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) { /* The page is not loaded from the flash */ - if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { /* * We change whole page so no need to load it. But we * have to set the @PG_checked flag to make the further @@ -453,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * the media. */ SetPageChecked(page); - else { + skipped_read = 1; + } else { err = do_readpage(page); if (err) { unlock_page(page); @@ -469,6 +471,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, err = allocate_budget(c, page, ui, appending); if (unlikely(err)) { ubifs_assert(err == -ENOSPC); + /* + * If we skipped reading the page because we were going to + * write all of it, then it is not up to date. + */ + if (skipped_read) { + ClearPageChecked(page); + ClearPageUptodate(page); + } /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the -- cgit v1.2.3 From b221337ae4ef9baff84d6d5ecb806e79a5597329 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 Mar 2009 17:20:22 +0200 Subject: UBIFS: fix bogus assertion Empty journal head LEBs are accounted as taken empty as well, so the GC LEB does not have to be the only taken empty LEB when nounting/remounting. Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1182b66a5491..03cd9ac4dcb2 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1318,11 +1318,15 @@ static int mount_ubifs(struct ubifs_info *c) else { c->need_recovery = 0; ubifs_msg("recovery completed"); - /* GC LEB has to be empty and taken at this point */ - ubifs_assert(c->lst.taken_empty_lebs == 1); + /* + * GC LEB has to be empty and taken at this point. But + * the journal head LEBs may also be accounted as + * "empty taken" if they are empty. + */ + ubifs_assert(c->lst.taken_empty_lebs > 0); } } else - ubifs_assert(c->lst.taken_empty_lebs == 1); + ubifs_assert(c->lst.taken_empty_lebs > 0); err = dbg_check_filesystem(c); if (err) @@ -1775,7 +1779,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) c->bu.buf = NULL; } - ubifs_assert(c->lst.taken_empty_lebs == 1); + ubifs_assert(c->lst.taken_empty_lebs > 0); return 0; } -- cgit v1.2.3 From 0a6fb8d9c435c612171b453449f98da28e9969a5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 14 Mar 2009 16:35:27 +0200 Subject: UBIFS: fix lprops committing bug When writing lprop nodes, do not forget to set @from to 0 when switching the LEB. This fixes the following bug: UBIFS error (pid 27768): ubifs_leb_write: writing -15456 bytes at 16:15880, error -22 UBIFS error (pid 27768): do_commit: commit failed, error -22 UBIFS warning (pid 27768): ubifs_ro_mode: switched to read-only mode, error -22 Pid: 27768, comm: freespace Not tainted 2.6.29-rc4-ubifs-2.6 #43 Call Trace: [] ubifs_ro_mode+0x54/0x56 [ubifs] [] do_commit+0x4f5/0x50a [ubifs] [] ubifs_run_commit+0xbc/0xdb [ubifs] [] ubifs_budget_space+0x742/0x9ed [ubifs] [] ? __mutex_lock_common+0x361/0x3ae [] ? ubifs_write_begin+0x18d/0x44c [ubifs] [] ubifs_write_begin+0x321/0x44c [ubifs] [] ? trace_hardirqs_on_caller+0x1f/0x14d [] generic_file_buffered_write+0x12f/0x2d9 [] __generic_file_aio_write_nolock+0x261/0x295 [] generic_file_aio_write+0x69/0xc5 [] ubifs_aio_write+0x14c/0x19e [ubifs] [] do_sync_write+0xe7/0x12d [] ? autoremove_wake_function+0x0/0x38 [] ? security_file_permission+0x11/0x13 [] vfs_write+0xab/0x105 [] sys_write+0x47/0x6f [] system_call_fastpath+0x16/0x1b Signed-off-by: Artem Bityutskiy --- fs/ubifs/lpt_commit.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 27c97a1873d5..1bead5a6d80a 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -421,8 +421,7 @@ static int write_cnodes(struct ubifs_info *c) err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; - offs = 0; - from = 0; + offs = from = 0; ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); err = ubifs_leb_unmap(c, lnum); @@ -480,7 +479,7 @@ static int write_cnodes(struct ubifs_info *c) err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; - offs = 0; + offs = from = 0; ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); err = ubifs_leb_unmap(c, lnum); @@ -507,7 +506,7 @@ static int write_cnodes(struct ubifs_info *c) err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; - offs = 0; + offs = from = 0; ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); err = ubifs_leb_unmap(c, lnum); -- cgit v1.2.3 From c9927c3ee2d3d14893efd793a2a9ea772ddb4289 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 Mar 2009 09:42:03 +0200 Subject: UBIFS: use KERN_CONT Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.c | 4 ++-- fs/ubifs/lpt_commit.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index e975bd82f38b..93f6532eff00 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) "bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) - printk("%c", dent->name[i]); + printk(KERN_CONT "%c", dent->name[i]); } - printk("\n"); + printk(KERN_CONT "\n"); break; } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 1bead5a6d80a..9d77f68b2f8e 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1,4 +1,4 @@ -/* + /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. @@ -1921,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) lnum, offs); err = ubifs_unpack_nnode(c, buf, &nnode); for (i = 0; i < UBIFS_LPT_FANOUT; i++) { - printk("%d:%d", nnode.nbranch[i].lnum, + printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, nnode.nbranch[i].offs); if (i != UBIFS_LPT_FANOUT - 1) - printk(", "); + printk(KERN_CONT ", "); } - printk("\n"); + printk(KERN_CONT "\n"); break; } case UBIFS_LPT_LTAB: -- cgit v1.2.3 From fb1cd01a33ecb8a49d590c034ba146dff80c5597 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 Mar 2009 09:56:57 +0200 Subject: UBIFS: introduce a helpful variable This patch introduces a helpful @c->idx_leb_size variable. The patch also fixes some spelling issues and makes comments use "LEB" instead of "eraseblock", which is more correct. Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 33 +++++++++++++++------------------ fs/ubifs/sb.c | 1 - fs/ubifs/super.c | 3 +++ fs/ubifs/ubifs.h | 7 +++++-- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index f393620890ee..8cd425b628ee 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c) } /** - * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. * @c: UBIFS file-system description object * - * This function calculates and returns the number of eraseblocks which should - * be kept for index usage. + * This function calculates and returns the number of LEBs which should be kept + * for index usage. */ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) { - int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; + int idx_lebs; long long idx_size; idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; - /* And make sure we have thrice the index size of space reserved */ - idx_size = idx_size + (idx_size << 1); - + idx_size += idx_size << 1; /* * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' * pair, nor similarly the two variables for the new index size, so we * have to do this costly 64-bit division on fast-path. */ - idx_size += eff_leb_size - 1; - idx_lebs = div_u64(idx_size, eff_leb_size); + idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); /* * The index head is not available for the in-the-gaps method, so add an * extra LEB to compensate. @@ -310,15 +307,15 @@ static int can_use_rp(struct ubifs_info *c) * do_budget_space - reserve flash space for index and data growth. * @c: UBIFS file-system description object * - * This function makes sure UBIFS has enough free eraseblocks for index growth - * and data. + * This function makes sure UBIFS has enough free LEBs for index growth and + * data. * * When budgeting index space, UBIFS reserves thrice as many LEBs as the index * would take if it was consolidated and written to the flash. This guarantees * that the "in-the-gaps" commit method always succeeds and UBIFS will always * be able to commit dirty index. So this function basically adds amount of * budgeted index space to the size of the current index, multiplies this by 3, - * and makes sure this does not exceed the amount of free eraseblocks. + * and makes sure this does not exceed the amount of free LEBs. * * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might @@ -695,12 +692,12 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) * This function calculates amount of free space to report to user-space. * * Because UBIFS may introduce substantial overhead (the index, node headers, - * alignment, wastage at the end of eraseblocks, etc), it cannot report real - * amount of free flash space it has (well, because not all dirty space is - * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, - * it would bread user expectations about what free space is. Users seem to - * accustomed to assume that if the file-system reports N bytes of free space, - * they would be able to fit a file of N bytes to the FS. This almost works for + * alignment, wastage at the end of LEBs, etc), it cannot report real amount of + * free flash space it has (well, because not all dirty space is reclaimable, + * UBIFS does not actually know the real amount). If UBIFS did so, it would + * bread user expectations about what free space is. Users seem to accustomed + * to assume that if the file-system reports N bytes of free space, they would + * be able to fit a file of N bytes to the FS. This almost works for * traditional file-systems, because they have way less overhead than UBIFS. * So, to keep users happy, UBIFS tries to take the overhead into account. */ diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index e070c643d1bb..0dec47c87c6d 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -623,7 +623,6 @@ int ubifs_read_superblock(struct ubifs_info *c) c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; c->main_first = c->leb_cnt - c->main_lebs; - c->report_rp_size = ubifs_reported_space(c, c->rp_size); err = validate_sb(c, sup); out: diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 03cd9ac4dcb2..7bdd248ec770 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -700,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c) if (err) return err; + /* Initialize effective LEB size used in budgeting calculations */ + c->idx_leb_size = c->leb_size - c->max_idx_node_sz; return 0; } @@ -716,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c) long long tmp64; c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->report_rp_size = ubifs_reported_space(c, c->rp_size); /* * Calculate total amount of FS blocks. This number is not used diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 2da1193a381f..a53b9a6df2be 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1015,6 +1015,8 @@ struct ubifs_debug_info; * @min_io_shift: number of bits in @min_io_size minus one * @leb_size: logical eraseblock size in bytes * @half_leb_size: half LEB size + * @idx_leb_size: how many bytes of an LEB are effectively available when it is + * used to store indexing nodes (@leb_size - @max_idx_node_sz) * @leb_cnt: count of logical eraseblocks * @max_leb_cnt: maximum count of logical eraseblocks * @old_leb_cnt: count of logical eraseblocks before re-size @@ -1132,8 +1134,8 @@ struct ubifs_debug_info; * previous commit start * @uncat_list: list of un-categorized LEBs * @empty_list: list of empty LEBs - * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) - * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list * * @ltab_lnum: LEB number of LPT's own lprops table @@ -1253,6 +1255,7 @@ struct ubifs_info { int min_io_shift; int leb_size; int half_leb_size; + int idx_leb_size; int leb_cnt; int max_leb_cnt; int old_leb_cnt; -- cgit v1.2.3 From 7d4e9ccb435e51e013e63abd340b4f496428139c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 20 Mar 2009 19:11:12 +0200 Subject: UBIFS: fix commentaries Signed-off-by: Artem Bityutskiy --- fs/ubifs/budget.c | 4 ++-- fs/ubifs/debug.c | 2 +- fs/ubifs/file.c | 2 +- fs/ubifs/journal.c | 2 +- fs/ubifs/log.c | 2 +- fs/ubifs/lpt_commit.c | 2 +- fs/ubifs/replay.c | 2 +- fs/ubifs/tnc.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 8cd425b628ee..af1914462f02 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -322,8 +322,8 @@ static int can_use_rp(struct ubifs_info *c) * be large, because UBIFS does not do any index consolidation as long as * there is free space. IOW, the index may take a lot of LEBs, but the LEBs * will contain a lot of dirt. - * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be - * consolidated to take up to @c->min_idx_lebs LEBs. + * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, + * the index may be consolidated to take up to @c->min_idx_lebs LEBs. * * This function returns zero in case of success, and %-ENOSPC in case of * failure. diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 93f6532eff00..ce2cd8343618 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1214,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) /* * Make sure the last key in our znode is less or - * equivalent than the the key in zbranch which goes + * equivalent than the key in the zbranch which goes * after our pointing zbranch. */ cmp = keys_cmp(c, max, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4e7f0aca9ebc..4e256b8f56b2 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -959,7 +959,7 @@ static int do_writepage(struct page *page, int len) * whole index and correct all inode sizes, which is long an unacceptable. * * To prevent situations like this, UBIFS writes pages back only if they are - * within last synchronized inode size, i.e. the the size which has been + * within the last synchronized inode size, i.e. the size which has been * written to the flash media last time. Otherwise, UBIFS forces inode * write-back, thus making sure the on-flash inode contains current inode size, * and then keeps writing pages back. diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index a2d334eccbca..64b5f3a309f5 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1365,7 +1365,7 @@ out_ro: * @host: host inode * * This function writes the updated version of an extended attribute inode and - * the host inode tho the journal (to the base head). The host inode is written + * the host inode to the journal (to the base head). The host inode is written * after the extended attribute inode in order to guarantee that the extended * attribute will be flushed when the inode is synchronized by 'fsync()' and * consequently, the write-buffer is synchronized. This function returns zero diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 1004261dc864..56e33772a1ee 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) } /* - * Make sure the the amount of space in buds will not exceed + * Make sure the amount of space in buds will not exceed the * 'c->max_bud_bytes' limit, because we want to guarantee mount time * limits. * diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 9d77f68b2f8e..8cbfb8248025 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1,4 +1,4 @@ - /* +/* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ce42a7b0ca5a..11cc80125a49 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) dirty -= c->leb_size - lp->free; /* * If the replay order was perfect the dirty space would now be - * zero. The order is not perfect because the the journal heads + * zero. The order is not perfect because the journal heads * race with each other. This is not a problem but is does mean * that the dirty space may temporarily exceed c->leb_size * during the replay. diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index fa28a84c6a1b..f249f7b0d656 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -1252,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, * splitting in the middle of the colliding sequence. Also, when * removing the leftmost key, we would have to correct the key of the * parent node, which would introduce additional complications. Namely, - * if we changed the the leftmost key of the parent znode, the garbage + * if we changed the leftmost key of the parent znode, the garbage * collector would be unable to find it (GC is doing this when GC'ing * indexing LEBs). Although we already have an additional RB-tree where * we save such changed znodes (see 'ins_clr_old_idx_znode()') until -- cgit v1.2.3 From f10770f5e56b4297701fd7c3e551b206f98d7ac2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 8 Mar 2009 15:13:00 +0200 Subject: UBIFS: fully sort GCed nodes The 'joinup()' function cannot deal with situations when nodes go in reverse order - it just leaves them in this order. This patch implement full nodes sorting using n*log(n) algorithm. It sorts data nodes for bulk-read, and direntry nodes for readdir(). Signed-off-by: Artem Bityutskiy --- fs/ubifs/gc.c | 428 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 296 insertions(+), 132 deletions(-) diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index a711d33b3d3e..f0f5f15d384e 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -47,7 +47,7 @@ * have to waste large pieces of free space at the end of LEB B, because nodes * from LEB A would not fit. And the worst situation is when all nodes are of * maximum size. So dark watermark is the amount of free + dirty space in LEB - * which are guaranteed to be reclaimable. If LEB has less space, the GC migh + * which are guaranteed to be reclaimable. If LEB has less space, the GC might * be unable to reclaim it. So, LEBs with free + dirty greater than dark * watermark are "good" LEBs from GC's point of few. The other LEBs are not so * good, and GC takes extra care when moving them. @@ -56,14 +56,6 @@ #include #include "ubifs.h" -/* - * GC tries to optimize the way it fit nodes to available space, and it sorts - * nodes a little. The below constants are watermarks which define "large", - * "medium", and "small" nodes. - */ -#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) -#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ - /* * GC may need to move more than one LEB to make progress. The below constants * define "soft" and "hard" limits on the number of LEBs the garbage collector @@ -116,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c) } /** - * joinup - bring data nodes for an inode together. - * @c: UBIFS file-system description object - * @sleb: describes scanned LEB - * @inum: inode number - * @blk: block number - * @data: list to which to add data nodes + * list_sort - sort a list. + * @priv: private data, passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function * - * This function looks at the first few nodes in the scanned LEB @sleb and adds - * them to @data if they are data nodes from @inum and have a larger block - * number than @blk. This function returns %0 on success and a negative error - * code on failure. + * This function has been implemented by Mark J Roberts . It + * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted + * in ascending order. + * + * The comparison function @cmp is supposed to return a negative value if @a is + * than @b, and a positive value if @a is greater than @b. If @a and @b are + * equivalent, then it does not matter what this function returns. */ -static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, - unsigned int blk, struct list_head *data) +static void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)) { - int err, cnt = 6, lnum = sleb->lnum, offs; - struct ubifs_scan_node *snod, *tmp; - union ubifs_key *key; + struct list_head *p, *q, *e, *list, *tail, *oldhead; + int insize, nmerges, psize, qsize, i; + + if (list_empty(head)) + return; + + list = head->next; + list_del(head); + insize = 1; + for (;;) { + p = oldhead = list; + list = tail = NULL; + nmerges = 0; + + while (p) { + nmerges++; + q = p; + psize = 0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next == oldhead ? NULL : q->next; + if (!q) + break; + } - list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { - key = &snod->key; - if (key_inum(c, key) == inum && - key_type(c, key) == UBIFS_DATA_KEY && - key_block(c, key) > blk) { - offs = snod->offs; - err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); - if (err < 0) - return err; - list_del(&snod->list); - if (err) { - list_add_tail(&snod->list, data); - blk = key_block(c, key); - } else - kfree(snod); - cnt = 6; - } else if (--cnt == 0) + qsize = insize; + while (psize > 0 || (qsize > 0 && q)) { + if (!psize) { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } else if (!qsize || !q) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else if (cmp(priv, p, q) <= 0) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } + if (tail) + tail->next = e; + else + list = e; + e->prev = tail; + tail = e; + } + p = q; + } + + tail->next = list; + list->prev = tail; + + if (nmerges <= 1) break; + + insize *= 2; } - return 0; + + head->next = list; + head->prev = list->prev; + list->prev->next = head; + list->prev = head; } /** - * move_nodes - move nodes. + * data_nodes_cmp - compare 2 data nodes. + * @priv: UBIFS file-system description object + * @a: first data node + * @a: second data node + * + * This function compares data nodes @a and @b. Returns %1 if @a has greater + * inode or block number, and %-1 otherwise. + */ +int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + ino_t inuma, inumb; + struct ubifs_info *c = priv; + struct ubifs_scan_node *sa, *sb; + + cond_resched(); + sa = list_entry(a, struct ubifs_scan_node, list); + sb = list_entry(b, struct ubifs_scan_node, list); + ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); + ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); + + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma == inumb) { + unsigned int blka = key_block(c, &sa->key); + unsigned int blkb = key_block(c, &sb->key); + + if (blka <= blkb) + return -1; + } else if (inuma <= inumb) + return -1; + + return 1; +} + +/* + * nondata_nodes_cmp - compare 2 non-data nodes. + * @priv: UBIFS file-system description object + * @a: first node + * @a: second node + * + * This function compares nodes @a and @b. It makes sure that inode nodes go + * first and sorted by length in descending order. Directory entry nodes go + * after inode nodes and are sorted in ascending hash valuer order. + */ +int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + int typea, typeb; + ino_t inuma, inumb; + struct ubifs_info *c = priv; + struct ubifs_scan_node *sa, *sb; + + cond_resched(); + sa = list_entry(a, struct ubifs_scan_node, list); + sb = list_entry(b, struct ubifs_scan_node, list); + typea = key_type(c, &sa->key); + typeb = key_type(c, &sb->key); + ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); + + /* Inodes go before directory entries */ + if (typea == UBIFS_INO_KEY) { + if (typeb == UBIFS_INO_KEY) + return sb->len - sa->len; + return -1; + } + if (typeb == UBIFS_INO_KEY) + return 1; + + ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma == inumb) { + uint32_t hasha = key_hash(c, &sa->key); + uint32_t hashb = key_hash(c, &sb->key); + + if (hasha <= hashb) + return -1; + } else if (inuma <= inumb) + return -1; + + return 1; +} + +/** + * sort_nodes - sort nodes for GC. * @c: UBIFS file-system description object - * @sleb: describes nodes to move + * @sleb: describes nodes to sort and contains the result on exit + * @nondata: contains non-data nodes on exit + * @min: minimum node size is returned here * - * This function moves valid nodes from data LEB described by @sleb to the GC - * journal head. The obsolete nodes are dropped. + * This function sorts the list of inodes to garbage collect. First of all, it + * kills obsolete nodes and separates data and non-data nodes to the + * @sleb->nodes and @nondata lists correspondingly. + * + * Data nodes are then sorted in block number order - this is important for + * bulk-read; data nodes with lower inode number go before data nodes with + * higher inode number, and data nodes with lower block number go before data + * nodes with higher block number; * - * When moving nodes we have to deal with classical bin-packing problem: the - * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", - * where the nodes in the @sleb->nodes list are the elements which should be - * fit optimally to the bins. This function uses the "first fit decreasing" - * strategy, although it does not really sort the nodes but just split them on - * 3 classes - large, medium, and small, so they are roughly sorted. + * Non-data nodes are sorted as follows. + * o First go inode nodes - they are sorted in descending length order. + * o Then go directory entry nodes - they are sorted in hash order, which + * should supposedly optimize 'readdir()'. Direntry nodes with lower parent + * inode number go before direntry nodes with higher parent inode number, + * and direntry nodes with lower name hash values go before direntry nodes + * with higher name hash values. * - * This function returns zero in case of success, %-EAGAIN if commit is - * required, and other negative error codes in case of other failures. + * This function returns zero in case of success and a negative error code in + * case of failure. */ -static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + struct list_head *nondata, int *min) { struct ubifs_scan_node *snod, *tmp; - struct list_head data, large, medium, small; - struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; - int avail, err, min = INT_MAX; - unsigned int blk = 0; - ino_t inum = 0; - INIT_LIST_HEAD(&data); - INIT_LIST_HEAD(&large); - INIT_LIST_HEAD(&medium); - INIT_LIST_HEAD(&small); + *min = INT_MAX; - while (!list_empty(&sleb->nodes)) { - struct list_head *lst = sleb->nodes.next; - - snod = list_entry(lst, struct ubifs_scan_node, list); + /* Separate data nodes and non-data nodes */ + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + int err; ubifs_assert(snod->type != UBIFS_IDX_NODE); ubifs_assert(snod->type != UBIFS_REF_NODE); @@ -201,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, snod->offs, 0); if (err < 0) - goto out; + return err; - list_del(lst); if (!err) { /* The node is obsolete, remove it from the list */ + list_del(&snod->list); kfree(snod); continue; } - /* - * Sort the list of nodes so that data nodes go first, large - * nodes go second, and small nodes go last. - */ - if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { - if (inum != key_inum(c, &snod->key)) { - if (inum) { - /* - * Try to move data nodes from the same - * inode together. - */ - err = joinup(c, sleb, inum, blk, &data); - if (err) - goto out; - } - inum = key_inum(c, &snod->key); - blk = key_block(c, &snod->key); - } - list_add_tail(lst, &data); - } else if (snod->len > MEDIUM_NODE_WM) - list_add_tail(lst, &large); - else if (snod->len > SMALL_NODE_WM) - list_add_tail(lst, &medium); - else - list_add_tail(lst, &small); - - /* And find the smallest node */ - if (snod->len < min) - min = snod->len; + if (snod->len < *min) + *min = snod->len; + + if (key_type(c, &snod->key) != UBIFS_DATA_KEY) + list_move_tail(&snod->list, nondata); } - /* - * Join the tree lists so that we'd have one roughly sorted list - * ('large' will be the head of the joined list). - */ - list_splice(&data, &large); - list_splice(&medium, large.prev); - list_splice(&small, large.prev); + /* Sort data and non-data nodes */ + list_sort(c, &sleb->nodes, &data_nodes_cmp); + list_sort(c, nondata, &nondata_nodes_cmp); + return 0; +} + +/** + * move_node - move a node. + * @c: UBIFS file-system description object + * @sleb: describes the LEB to move nodes from + * @snod: the mode to move + * @wbuf: write-buffer to move node to + * + * This function moves node @snod to @wbuf, changes TNC correspondingly, and + * destroys @snod. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf) +{ + int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used; + + cond_resched(); + err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len); + if (err) + return err; + + err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, + snod->offs, new_lnum, new_offs, + snod->len); + list_del(&snod->list); + kfree(snod); + return err; +} + +/** + * move_nodes - move nodes. + * @c: UBIFS file-system description object + * @sleb: describes the LEB to move nodes from + * + * This function moves valid nodes from data LEB described by @sleb to the GC + * journal head. This function returns zero in case of success, %-EAGAIN if + * commit is required, and other negative error codes in case of other + * failures. + */ +static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +{ + int err, min; + LIST_HEAD(nondata); + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; if (wbuf->lnum == -1) { /* @@ -256,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) */ err = switch_gc_head(c); if (err) - goto out; + return err; } + err = sort_nodes(c, sleb, &nondata, &min); + if (err) + goto out; + /* Write nodes to their new location. Use the first-fit strategy */ while (1) { - avail = c->leb_size - wbuf->offs - wbuf->used; - list_for_each_entry_safe(snod, tmp, &large, list) { - int new_lnum, new_offs; + int avail; + struct ubifs_scan_node *snod, *tmp; + + /* Move data nodes */ + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + avail = c->leb_size - wbuf->offs - wbuf->used; + if (snod->len > avail) + /* + * Do not skip data nodes in order to optimize + * bulk-read. + */ + break; + + err = move_node(c, sleb, snod, wbuf); + if (err) + goto out; + } + /* Move non-data nodes */ + list_for_each_entry_safe(snod, tmp, &nondata, list) { + avail = c->leb_size - wbuf->offs - wbuf->used; if (avail < min) break; - if (snod->len > avail) - /* This node does not fit */ + if (snod->len > avail) { + /* + * Keep going only if this is an inode with + * some data. Otherwise stop and switch the GC + * head. IOW, we assume that data-less inode + * nodes and direntry nodes are roughly of the + * same size. + */ + if (key_type(c, &snod->key) == UBIFS_DENT_KEY || + snod->len == UBIFS_INO_NODE_SZ) + break; continue; + } - cond_resched(); - - new_lnum = wbuf->lnum; - new_offs = wbuf->offs + wbuf->used; - err = ubifs_wbuf_write_nolock(wbuf, snod->node, - snod->len); + err = move_node(c, sleb, snod, wbuf); if (err) goto out; - err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, - snod->offs, new_lnum, new_offs, - snod->len); - if (err) - goto out; - - avail = c->leb_size - wbuf->offs - wbuf->used; - list_del(&snod->list); - kfree(snod); } - if (list_empty(&large)) + if (list_empty(&sleb->nodes) && list_empty(&nondata)) break; /* @@ -306,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) return 0; out: - list_for_each_entry_safe(snod, tmp, &large, list) { - list_del(&snod->list); - kfree(snod); - } + list_splice_tail(&nondata, &sleb->nodes); return err; } -- cgit v1.2.3 From fcabb3479e2b15abfd2d2ef5363295f16e98b2d7 Mon Sep 17 00:00:00 2001 From: Hunter Adrian Date: Wed, 18 Mar 2009 12:29:39 +0100 Subject: UBIFS: fix compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/ubifs/super.c: In function ‘ubifs_show_options’: fs/ubifs/super.c:425: warning: format not a string literal and no format arguments fs/ubifs/super.c: In function ‘mount_ubifs’: fs/ubifs/super.c:1204: warning: format not a string literal and no format arguments fs/ubifs/super.c: In function ‘ubifs_remount_rw’: fs/ubifs/super.c:1557: warning: format not a string literal and no format arguments Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy --- fs/ubifs/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7bdd248ec770..372c7fb66531 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -421,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) seq_printf(s, ",no_chk_data_crc"); if (c->mount_opts.override_compr) { - seq_printf(s, ",compr="); - seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); + seq_printf(s, ",compr=%s", + ubifs_compr_name(c->mount_opts.compr_type)); } return 0; @@ -1204,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_cbuf; /* Create background thread */ - c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; @@ -1561,7 +1561,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) ubifs_create_buds_lists(c); /* Create background thread */ - c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { err = PTR_ERR(c->bgt); c->bgt = NULL; -- cgit v1.2.3 From 963f0cf6d116d83c558a8efe9045c1c5ad7aed34 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 Mar 2009 12:51:21 +0200 Subject: UBIFS: add R/O compatibility Now UBIFS is supported by u-boot. If we ever decide to change the media format, then people will have to upgrade their u-boots to mount new format images. However, very often it is possible to preserve R/O forward-compatibility, even though the write forward-compatibility is not preserved. This patch introduces a new super-block field which stores the R/O compatibility version. Signed-off-by: Artem Bityutskiy Acked-by: Adrian Hunter --- fs/ubifs/sb.c | 35 +++++++++++++++++++++++++++++------ fs/ubifs/super.c | 14 ++++++++++++-- fs/ubifs/ubifs-media.h | 30 +++++++++++++++++++++++++++--- fs/ubifs/ubifs.h | 4 ++++ 4 files changed, 72 insertions(+), 11 deletions(-) diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 0dec47c87c6d..57085e43320f 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c) if (tmp64 > DEFAULT_MAX_RP_SIZE) tmp64 = DEFAULT_MAX_RP_SIZE; sup->rp_size = cpu_to_le64(tmp64); + sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); kfree(sup); @@ -532,17 +533,39 @@ int ubifs_read_superblock(struct ubifs_info *c) if (IS_ERR(sup)) return PTR_ERR(sup); + c->fmt_version = le32_to_cpu(sup->fmt_version); + c->ro_compat_version = le32_to_cpu(sup->ro_compat_version); + /* * The software supports all previous versions but not future versions, * due to the unavailability of time-travelling equipment. */ - c->fmt_version = le32_to_cpu(sup->fmt_version); if (c->fmt_version > UBIFS_FORMAT_VERSION) { - ubifs_err("on-flash format version is %d, but software only " - "supports up to version %d", c->fmt_version, - UBIFS_FORMAT_VERSION); - err = -EINVAL; - goto out; + struct super_block *sb = c->vfs_sb; + int mounting_ro = sb->s_flags & MS_RDONLY; + + ubifs_assert(!c->ro_media || mounting_ro); + if (!mounting_ro || + c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { + ubifs_err("on-flash format version is w%d/r%d, but " + "software only supports up to version " + "w%d/r%d", c->fmt_version, + c->ro_compat_version, UBIFS_FORMAT_VERSION, + UBIFS_RO_COMPAT_VERSION); + if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { + ubifs_msg("only R/O mounting is possible"); + err = -EROFS; + } else + err = -EINVAL; + goto out; + } + + /* + * The FS is mounted R/O, and the media format is + * R/O-compatible with the UBIFS implementation, so we can + * mount. + */ + c->rw_incompat = 1; } if (c->fmt_version < 3) { diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 372c7fb66531..302a2056422e 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1351,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c) x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); - ubifs_msg("media format: %d (latest is %d)", - c->fmt_version, UBIFS_FORMAT_VERSION); + ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); ubifs_msg("reserved for root: %llu bytes (%llu KiB)", c->report_rp_size, c->report_rp_size >> 10); @@ -1492,6 +1493,15 @@ static int ubifs_remount_rw(struct ubifs_info *c) { int err, lnum; + if (c->rw_incompat) { + ubifs_err("the file-system is not R/W-compatible"); + ubifs_msg("on-flash format version is w%d/r%d, but software " + "only supports up to version w%d/r%d", c->fmt_version, + c->ro_compat_version, UBIFS_FORMAT_VERSION, + UBIFS_RO_COMPAT_VERSION); + return -EROFS; + } + mutex_lock(&c->umount_mutex); dbg_save_space_info(c); c->remounting_rw = 1; diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index b25fc36cf72f..3eee07e0c495 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -36,9 +36,31 @@ /* UBIFS node magic number (must not have the padding byte first or last) */ #define UBIFS_NODE_MAGIC 0x06101831 -/* UBIFS on-flash format version */ +/* + * UBIFS on-flash format version. This version is increased when the on-flash + * format is changing. If this happens, UBIFS is will support older versions as + * well. But older UBIFS code will not support newer formats. Format changes + * will be rare and only when absolutely necessary, e.g. to fix a bug or to add + * a new feature. + * + * UBIFS went into mainline kernel with format version 4. The older formats + * were development formats. + */ #define UBIFS_FORMAT_VERSION 4 +/* + * Read-only compatibility version. If the UBIFS format is changed, older UBIFS + * implementations will not be able to mount newer formats in read-write mode. + * However, depending on the change, it may be possible to mount newer formats + * in R/O mode. This is indicated by the R/O compatibility version which is + * stored in the super-block. + * + * This is needed to support boot-loaders which only need R/O mounting. With + * this flag it is possible to do UBIFS format changes without a need to update + * boot-loaders. + */ +#define UBIFS_RO_COMPAT_VERSION 0 + /* Minimum logical eraseblock size in bytes */ #define UBIFS_MIN_LEB_SZ (15*1024) @@ -53,7 +75,7 @@ /* * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes - * shorter than uncompressed data length, UBIFS preferes to leave this data + * shorter than uncompressed data length, UBIFS prefers to leave this data * node uncompress, because it'll be read faster. */ #define UBIFS_MIN_COMPRESS_DIFF 64 @@ -586,6 +608,7 @@ struct ubifs_pad_node { * @padding2: reserved for future, zeroes * @time_gran: time granularity in nanoseconds * @uuid: UUID generated when the file system image was created + * @ro_compat_version: UBIFS R/O compatibility version */ struct ubifs_sb_node { struct ubifs_ch ch; @@ -612,7 +635,8 @@ struct ubifs_sb_node { __le64 rp_size; __le32 time_gran; __u8 uuid[16]; - __u8 padding2[3972]; + __le32 ro_compat_version; + __u8 padding2[3968]; } __attribute__ ((packed)); /** diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a53b9a6df2be..0a8341e14088 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -934,6 +934,7 @@ struct ubifs_debug_info; * by @commit_sem * @cnt_lock: protects @highest_inum and @max_sqnum counters * @fmt_version: UBIFS on-flash format version + * @ro_compat_version: R/O compatibility version * @uuid: UUID from super block * * @lhead_lnum: log head logical eraseblock number @@ -966,6 +967,7 @@ struct ubifs_debug_info; * recovery) * @bulk_read: enable bulk-reads * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @rw_incompat: the media is not R/W compatible * * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and * @calc_idx_sz @@ -1179,6 +1181,7 @@ struct ubifs_info { unsigned long long cmt_no; spinlock_t cnt_lock; int fmt_version; + int ro_compat_version; unsigned char uuid[16]; int lhead_lnum; @@ -1207,6 +1210,7 @@ struct ubifs_info { unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; unsigned int default_compr:2; + unsigned int rw_incompat:1; struct mutex tnc_mutex; struct ubifs_zbranch zroot; -- cgit v1.2.3 From de0975781a1a8bc92e07eb7681d10ef9bb5e6df9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Mar 2009 11:09:04 +0100 Subject: UBIFS: fix recovery bug UBIFS did not recovery in a situation in which it could have. The relevant function assumed there could not be more nodes in an eraseblock after a corrupted node, but in fact the last (NAND) page written might contain anything. The correct approach is to check for empty space (0xFF bytes) from then on. Signed-off-by: Adrian Hunter --- fs/ubifs/recovery.c | 70 ++++++++++++++++++----------------------------------- 1 file changed, 23 insertions(+), 47 deletions(-) diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 90acac603e63..10662975d2ef 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, * @lnum: LEB number of the LEB from which @buf was read * @offs: offset from which @buf was read * - * This function scans @buf for more nodes and returns %0 is a node is found and - * %1 if no more nodes are found. + * This function ensures that the corrupted node at @offs is the last thing + * written to a LEB. This function returns %1 if more data is not found and + * %0 if more data is found. */ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, int lnum, int offs) { - int skip, next_offs = 0; + struct ubifs_ch *ch = buf; + int skip, dlen = le32_to_cpu(ch->len); - if (len > UBIFS_DATA_NODE_SZ) { - struct ubifs_ch *ch = buf; - int dlen = le32_to_cpu(ch->len); - - if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && - dlen <= UBIFS_MAX_DATA_NODE_SZ) - /* The corrupt node looks like a data node */ - next_offs = ALIGN(offs + dlen, 8); - } - - if (c->min_io_size == 1) - skip = 8; - else - skip = ALIGN(offs + 1, c->min_io_size) - offs; - - offs += skip; - buf += skip; - len -= skip; - while (len > 8) { - struct ubifs_ch *ch = buf; - uint32_t magic = le32_to_cpu(ch->magic); - int ret; - - if (magic == UBIFS_NODE_MAGIC) { - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); - if (ret == SCANNED_A_NODE || ret > 0) { - /* - * There is a small chance this is just data in - * a data node, so check that possibility. e.g. - * this is part of a file that itself contains - * a UBIFS image. - */ - if (next_offs && offs + le32_to_cpu(ch->len) <= - next_offs) - continue; - dbg_rcvry("unexpected node at %d:%d", lnum, - offs); - return 0; - } - } - offs += 8; - buf += 8; - len -= 8; + /* Check for empty space after the corrupt node's common header */ + skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; + if (is_empty(buf + skip, len - skip)) + return 1; + /* + * The area after the common header size is not empty, so the common + * header must be intact. Check it. + */ + if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { + dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); + return 0; } - return 1; + /* Now we know the corrupt node's length we can skip over it */ + skip = ALIGN(offs + dlen, c->min_io_size) - offs; + /* After which there should be empty space */ + if (is_empty(buf + skip, len - skip)) + return 1; + dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip); + return 0; } /** -- cgit v1.2.3