diff options
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/acl.c | 33 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 110 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 17 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdebug.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 31 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 24 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmthread.c | 114 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfs.c | 18 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 331 | ||||
-rw-r--r-- | fs/ocfs2/file.h | 6 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 29 | ||||
-rw-r--r-- | fs/ocfs2/inode.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 34 | ||||
-rw-r--r-- | fs/ocfs2/localalloc.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/quota_global.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/quota_local.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.c | 32 | ||||
-rw-r--r-- | fs/ocfs2/reservations.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 5 | ||||
-rw-r--r-- | fs/ocfs2/xattr.c | 200 |
23 files changed, 671 insertions, 347 deletions
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index da702294d7e7..a76e0aa5cd3f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -290,12 +290,30 @@ static int ocfs2_set_acl(handle_t *handle, int ocfs2_check_acl(struct inode *inode, int mask) { - struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct buffer_head *di_bh = NULL; + struct posix_acl *acl; + int ret = -EAGAIN; - if (IS_ERR(acl)) + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return ret; + + ret = ocfs2_read_inode_block(inode, &di_bh); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh); + + brelse(di_bh); + + if (IS_ERR(acl)) { + mlog_errno(PTR_ERR(acl)); return PTR_ERR(acl); + } if (acl) { - int ret = posix_acl_permission(inode, acl, mask); + ret = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); return ret; } @@ -344,7 +362,7 @@ int ocfs2_init_acl(handle_t *handle, { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; - int ret = 0; + int ret = 0, ret2; mode_t mode; if (!S_ISLNK(inode->i_mode)) { @@ -381,7 +399,12 @@ int ocfs2_init_acl(handle_t *handle, mode = inode->i_mode; ret = posix_acl_create_masq(clone, &mode); if (ret >= 0) { - ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret2) { + mlog_errno(ret2); + ret = ret2; + goto cleanup; + } if (ret > 0) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_ACCESS, diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3623ca20cc18..0de69c9a08be 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock, dump_stack(); goto bail; } - - past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); - mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, - (unsigned long long)past_eof); - - if (create && (iblock >= past_eof)) - set_buffer_new(bh_result); } + past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); + mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, + (unsigned long long)past_eof); + if (create && (iblock >= past_eof)) + set_buffer_new(bh_result); + bail: if (err < 0) err = -EIO; @@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle, return ret; } -handle_t *ocfs2_start_walk_page_trans(struct inode *inode, - struct page *page, - unsigned from, - unsigned to) -{ - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - handle_t *handle; - int ret = 0; - - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; - } - - if (ocfs2_should_order_data(inode)) { - ret = ocfs2_jbd2_file_inode(handle, inode); - if (ret < 0) - mlog_errno(ret); - } -out: - if (ret) { - if (!IS_ERR(handle)) - ocfs2_commit_trans(osb, handle); - handle = ERR_PTR(ret); - } - return handle; -} - static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) { sector_t status; @@ -609,7 +578,9 @@ bail: static void ocfs2_dio_end_io(struct kiocb *iocb, loff_t offset, ssize_t bytes, - void *private) + void *private, + int ret, + bool is_async) { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; int level; @@ -623,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, if (!level) up_read(&inode->i_alloc_sem); ocfs2_rw_unlock(inode, level); + + if (is_async) + aio_complete(iocb, ret, 0); } /* @@ -669,11 +643,10 @@ static ssize_t ocfs2_direct_IO(int rw, if (i_size_read(inode) <= offset) return 0; - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, - inode->i_sb->s_bdev, iov, offset, - nr_segs, - ocfs2_direct_IO_get_blocks, - ocfs2_dio_end_io); + ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, + ocfs2_direct_IO_get_blocks, + ocfs2_dio_end_io, NULL, 0); mlog_exit(ret); return ret; @@ -1131,23 +1104,37 @@ out: */ static int ocfs2_grab_pages_for_write(struct address_space *mapping, struct ocfs2_write_ctxt *wc, - u32 cpos, loff_t user_pos, int new, + u32 cpos, loff_t user_pos, + unsigned user_len, int new, struct page *mmap_page) { int ret = 0, i; - unsigned long start, target_index, index; + unsigned long start, target_index, end_index, index; struct inode *inode = mapping->host; + loff_t last_byte; target_index = user_pos >> PAGE_CACHE_SHIFT; /* * Figure out how many pages we'll be manipulating here. For * non allocating write, we just change the one - * page. Otherwise, we'll need a whole clusters worth. + * page. Otherwise, we'll need a whole clusters worth. If we're + * writing past i_size, we only need enough pages to cover the + * last page of the write. */ if (new) { wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); + /* + * We need the index *past* the last page we could possibly + * touch. This is the page past the end of the write or + * i_size, whichever is greater. + */ + last_byte = max(user_pos + user_len, i_size_read(inode)); + BUG_ON(last_byte < 1); + end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1; + if ((start + wc->w_num_pages) > end_index) + wc->w_num_pages = end_index - start; } else { wc->w_num_pages = 1; start = target_index; @@ -1620,21 +1607,20 @@ out: * write path can treat it as an non-allocating write, which has no * special case code for sparse/nonsparse files. */ -static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, - unsigned len, +static int ocfs2_expand_nonsparse_inode(struct inode *inode, + struct buffer_head *di_bh, + loff_t pos, unsigned len, struct ocfs2_write_ctxt *wc) { int ret; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); loff_t newsize = pos + len; - if (ocfs2_sparse_alloc(osb)) - return 0; + BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); if (newsize <= i_size_read(inode)) return 0; - ret = ocfs2_extend_no_holes(inode, newsize, pos); + ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos); if (ret) mlog_errno(ret); @@ -1644,6 +1630,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, return ret; } +static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, + loff_t pos) +{ + int ret = 0; + + BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); + if (pos > i_size_read(inode)) + ret = ocfs2_zero_extend(inode, di_bh, pos); + + return ret; +} + int ocfs2_write_begin_nolock(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, @@ -1679,7 +1677,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, } } - ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); + if (ocfs2_sparse_alloc(osb)) + ret = ocfs2_zero_tail(inode, di_bh, pos); + else + ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len, + wc); if (ret) { mlog_errno(ret); goto out; @@ -1789,7 +1791,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * that we can zero and flush if we error after adding the * extent. */ - ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, + ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, cluster_of_pages, mmap_page); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa75ca3f78da..1361997cf205 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1759,6 +1759,7 @@ static int o2net_accept_one(struct socket *sock) struct sockaddr_in sin; struct socket *new_sock = NULL; struct o2nm_node *node = NULL; + struct o2nm_node *local_node = NULL; struct o2net_sock_container *sc = NULL; struct o2net_node *nn; @@ -1796,11 +1797,15 @@ static int o2net_accept_one(struct socket *sock) goto out; } - if (o2nm_this_node() > node->nd_num) { - mlog(ML_NOTICE, "unexpected connect attempted from a lower " - "numbered node '%s' at " "%pI4:%d with num %u\n", - node->nd_name, &sin.sin_addr.s_addr, - ntohs(sin.sin_port), node->nd_num); + if (o2nm_this_node() >= node->nd_num) { + local_node = o2nm_get_node_by_num(o2nm_this_node()); + mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" + "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", + local_node->nd_name, local_node->nd_num, + &(local_node->nd_ipv4_address), + ntohs(local_node->nd_ipv4_port), + node->nd_name, node->nd_num, &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); ret = -EINVAL; goto out; } @@ -1857,6 +1862,8 @@ out: sock_release(new_sock); if (node) o2nm_node_put(node); + if (local_node) + o2nm_node_put(local_node); if (sc) sc_put(sc); return ret; diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 0cd24cf54396..5efdd37dfe48 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -419,7 +419,7 @@ static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) static int debug_buffer_release(struct inode *inode, struct file *file) { - struct debug_buffer *db = (struct debug_buffer *)file->private_data; + struct debug_buffer *db = file->private_data; if (db) kfree(db->buf); @@ -715,7 +715,7 @@ static int debug_lockres_open(struct inode *inode, struct file *file) goto bail; } - seq = (struct seq_file *) file->private_data; + seq = file->private_data; seq->private = dl; dlm_grab(dlm); @@ -731,7 +731,7 @@ bail: static int debug_lockres_release(struct inode *inode, struct file *file) { - struct seq_file *seq = (struct seq_file *)file->private_data; + struct seq_file *seq = file->private_data; struct debug_lockres *dl = (struct debug_lockres *)seq->private; if (dl->dl_res) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6b5a492e1749..153abb5abef0 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain, struct dlm_ctxt *dlm = NULL; struct dlm_ctxt *new_ctxt = NULL; - if (strlen(domain) > O2NM_MAX_NAME_LEN) { + if (strlen(domain) >= O2NM_MAX_NAME_LEN) { ret = -ENAMETOOLONG; mlog(ML_ERROR, "domain name length too long\n"); goto leave; @@ -1709,6 +1709,7 @@ retry: } if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { + spin_unlock(&dlm_domain_lock); mlog(ML_ERROR, "Requested locking protocol version is not " "compatible with already registered domain " diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 4a7506a4e314..ffb4c68dafa4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -511,8 +511,6 @@ static void dlm_lockres_release(struct kref *kref) atomic_dec(&dlm->res_cur_count); - dlm_put(dlm); - if (!hlist_unhashed(&res->hash_node) || !list_empty(&res->granted) || !list_empty(&res->converting) || @@ -585,8 +583,6 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->migration_pending = 0; res->inflight_locks = 0; - /* put in dlm_lockres_release */ - dlm_grab(dlm); res->dlm = dlm; kref_init(&res->refs); @@ -2808,14 +2804,8 @@ again: mlog(0, "trying again...\n"); goto again; } - /* now that we are sure the MIGRATING state is there, drop - * the unneded state which blocked threads trying to DIRTY */ - spin_lock(&res->spinlock); - BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); - BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); - res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; - spin_unlock(&res->spinlock); + ret = 0; /* did the target go down or die? */ spin_lock(&dlm->spinlock); if (!test_bit(target, dlm->domain_map)) { @@ -2826,9 +2816,21 @@ again: spin_unlock(&dlm->spinlock); /* + * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for + * another try; otherwise, we are sure the MIGRATING state is there, + * drop the unneded state which blocked threads trying to DIRTY + */ + spin_lock(&res->spinlock); + BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); + res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; + if (!ret) + BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); + spin_unlock(&res->spinlock); + + /* * at this point: * - * o the DLM_LOCK_RES_MIGRATING flag is set + * o the DLM_LOCK_RES_MIGRATING flag is set if target not down * o there are no pending asts on this lockres * o all processes trying to reserve an ast on this * lockres must wait for the MIGRATING flag to clear @@ -3044,8 +3046,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, /* check for pre-existing lock */ spin_lock(&dlm->spinlock); res = __dlm_lookup_lockres(dlm, name, namelen, hash); - spin_lock(&dlm->master_lock); - if (res) { spin_lock(&res->spinlock); if (res->state & DLM_LOCK_RES_RECOVERING) { @@ -3063,14 +3063,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&res->spinlock); } + spin_lock(&dlm->master_lock); /* ignore status. only nonzero status would BUG. */ ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, migrate->new_master, migrate->master); -unlock: spin_unlock(&dlm->master_lock); +unlock: spin_unlock(&dlm->spinlock); if (oldmle) { diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f8b75ce4be70..aaaffbcbe916 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { int bit; - bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); + bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0); if (bit >= O2NM_MAX_NODES || bit < 0) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); else @@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, struct list_head *queue; struct dlm_lock *lock, *next; + assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&res->spinlock); res->state |= DLM_LOCK_RES_RECOVERING; if (!list_empty(&res->recovering)) { mlog(0, @@ -2326,19 +2328,15 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) /* zero the lvb if necessary */ dlm_revalidate_lvb(dlm, res, dead_node); if (res->owner == dead_node) { - if (res->state & DLM_LOCK_RES_DROPPING_REF) - mlog(0, "%s:%.*s: owned by " - "dead node %u, this node was " - "dropping its ref when it died. " - "continue, dropping the flag.\n", - dlm->name, res->lockname.len, - res->lockname.name, dead_node); - - /* the wake_up for this will happen when the - * RECOVERING flag is dropped later */ - res->state &= ~DLM_LOCK_RES_DROPPING_REF; + if (res->state & DLM_LOCK_RES_DROPPING_REF) { + mlog(ML_NOTICE, "Ignore %.*s for " + "recovery as it is being freed\n", + res->lockname.len, + res->lockname.name); + } else + dlm_move_lockres_to_recovery_list(dlm, + res); - dlm_move_lockres_to_recovery_list(dlm, res); } else if (res->owner == dlm->node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca68fe5..2211acf33d9b 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,19 +92,27 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) * truly ready to be freed. */ int __dlm_lockres_unused(struct dlm_lock_resource *res) { - if (!__dlm_lockres_has_locks(res) && - (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { - /* try not to scan the bitmap unless the first two - * conditions are already true */ - int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (bit >= O2NM_MAX_NODES) { - /* since the bit for dlm->node_num is not - * set, inflight_locks better be zero */ - BUG_ON(res->inflight_locks != 0); - return 1; - } - } - return 0; + int bit; + + if (__dlm_lockres_has_locks(res)) + return 0; + + if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) + return 0; + + if (res->state & DLM_LOCK_RES_RECOVERING) + return 0; + + bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + if (bit < O2NM_MAX_NODES) + return 0; + + /* + * since the bit for dlm->node_num is not set, inflight_locks better + * be zero + */ + BUG_ON(res->inflight_locks != 0); + return 1; } @@ -152,45 +160,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, spin_unlock(&dlm->spinlock); } -static int dlm_purge_lockres(struct dlm_ctxt *dlm, +static void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int master; int ret = 0; - spin_lock(&res->spinlock); - if (!__dlm_lockres_unused(res)) { - mlog(0, "%s:%.*s: tried to purge but not unused\n", - dlm->name, res->lockname.len, res->lockname.name); - __dlm_print_one_lock_resource(res); - spin_unlock(&res->spinlock); - BUG(); - } - - if (res->state & DLM_LOCK_RES_MIGRATING) { - mlog(0, "%s:%.*s: Delay dropref as this lockres is " - "being remastered\n", dlm->name, res->lockname.len, - res->lockname.name); - /* Re-add the lockres to the end of the purge list */ - if (!list_empty(&res->purge)) { - list_del_init(&res->purge); - list_add_tail(&res->purge, &dlm->purge_list); - } - spin_unlock(&res->spinlock); - return 0; - } + assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&res->spinlock); master = (res->owner == dlm->node_num); - if (!master) - res->state |= DLM_LOCK_RES_DROPPING_REF; - spin_unlock(&res->spinlock); mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, res->lockname.name, master); if (!master) { + res->state |= DLM_LOCK_RES_DROPPING_REF; /* drop spinlock... retake below */ + spin_unlock(&res->spinlock); spin_unlock(&dlm->spinlock); spin_lock(&res->spinlock); @@ -208,31 +196,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", dlm->name, res->lockname.len, res->lockname.name, ret); spin_lock(&dlm->spinlock); + spin_lock(&res->spinlock); } - spin_lock(&res->spinlock); if (!list_empty(&res->purge)) { mlog(0, "removing lockres %.*s:%p from purgelist, " "master = %d\n", res->lockname.len, res->lockname.name, res, master); list_del_init(&res->purge); - spin_unlock(&res->spinlock); dlm_lockres_put(res); dlm->purge_count--; - } else - spin_unlock(&res->spinlock); + } + + if (!__dlm_lockres_unused(res)) { + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", + dlm->name, res->lockname.len, res->lockname.name); + __dlm_print_one_lock_resource(res); + BUG(); + } __dlm_unhash_lockres(res); /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ if (!master) { - spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_DROPPING_REF; spin_unlock(&res->spinlock); wake_up(&res->wq); - } - return 0; + } else + spin_unlock(&res->spinlock); } static void dlm_run_purge_list(struct dlm_ctxt *dlm, @@ -251,17 +243,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, lockres = list_entry(dlm->purge_list.next, struct dlm_lock_resource, purge); - /* Status of the lockres *might* change so double - * check. If the lockres is unused, holding the dlm - * spinlock will prevent people from getting and more - * refs on it -- there's no need to keep the lockres - * spinlock. */ spin_lock(&lockres->spinlock); - unused = __dlm_lockres_unused(lockres); - spin_unlock(&lockres->spinlock); - - if (!unused) - continue; purge_jiffies = lockres->last_used + msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); @@ -273,15 +255,29 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, * in tail order, we can stop at the first * unpurgable resource -- anyone added after * him will have a greater last_used value */ + spin_unlock(&lockres->spinlock); break; } + /* Status of the lockres *might* change so double + * check. If the lockres is unused, holding the dlm + * spinlock will prevent people from getting and more + * refs on it. */ + unused = __dlm_lockres_unused(lockres); + if (!unused || + (lockres->state & DLM_LOCK_RES_MIGRATING)) { + mlog(0, "lockres %s:%.*s: is in use or " + "being remastered, used %d, state %d\n", + dlm->name, lockres->lockname.len, + lockres->lockname.name, !unused, lockres->state); + list_move_tail(&dlm->purge_list, &lockres->purge); + spin_unlock(&lockres->spinlock); + continue; + } + dlm_lockres_get(lockres); - /* This may drop and reacquire the dlm spinlock if it - * has to do migration. */ - if (dlm_purge_lockres(dlm, lockres)) - BUG(); + dlm_purge_lockres(dlm, lockres); dlm_lockres_put(lockres); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b83d6107a1f5..c2903b84bb7a 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -182,8 +182,7 @@ static int dlmfs_file_release(struct inode *inode, { int level, status; struct dlmfs_inode_private *ip = DLMFS_I(inode); - struct dlmfs_filp_private *fp = - (struct dlmfs_filp_private *) file->private_data; + struct dlmfs_filp_private *fp = file->private_data; if (S_ISDIR(inode->i_mode)) BUG(); @@ -214,10 +213,12 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_SIZE; error = inode_change_ok(inode, attr); - if (!error) - error = inode_setattr(inode, attr); + if (error) + return error; - return error; + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) @@ -355,13 +356,12 @@ static void dlmfs_destroy_inode(struct inode *inode) kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); } -static void dlmfs_clear_inode(struct inode *inode) +static void dlmfs_evict_inode(struct inode *inode) { int status; struct dlmfs_inode_private *ip; - if (!inode) - return; + end_writeback(inode); mlog(0, "inode %lu\n", inode->i_ino); @@ -631,7 +631,7 @@ static const struct super_operations dlmfs_ops = { .statfs = simple_statfs, .alloc_inode = dlmfs_alloc_inode, .destroy_inode = dlmfs_destroy_inode, - .clear_inode = dlmfs_clear_inode, + .evict_inode = dlmfs_evict_inode, .drop_inode = generic_delete_inode, }; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 39eb16ac5f98..5e02a893f46e 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2966,7 +2966,7 @@ static const struct seq_operations ocfs2_dlm_seq_ops = { static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) { - struct seq_file *seq = (struct seq_file *) file->private_data; + struct seq_file *seq = file->private_data; struct ocfs2_dlm_seq_priv *priv = seq->private; struct ocfs2_lock_res *res = &priv->p_iter_res; @@ -3000,7 +3000,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) goto out; } - seq = (struct seq_file *) file->private_data; + seq = file->private_data; seq->private = priv; ocfs2_add_lockres_tracking(&priv->p_iter_res, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6a13ea64c447..81296b4e3646 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -724,28 +724,55 @@ leave: return status; } +/* + * While a write will already be ordering the data, a truncate will not. + * Thus, we need to explicitly order the zeroed pages. + */ +static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + handle_t *handle = NULL; + int ret = 0; + + if (!ocfs2_should_order_data(inode)) + goto out; + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_jbd2_file_inode(handle, inode); + if (ret < 0) + mlog_errno(ret); + +out: + if (ret) { + if (!IS_ERR(handle)) + ocfs2_commit_trans(osb, handle); + handle = ERR_PTR(ret); + } + return handle; +} + /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to * worry about recursive locking in ->write_begin() and ->write_end(). */ -static int ocfs2_write_zero_page(struct inode *inode, - u64 size) +static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, + u64 abs_to) { struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long index; - unsigned int offset; + unsigned long index = abs_from >> PAGE_CACHE_SHIFT; handle_t *handle = NULL; - int ret; + int ret = 0; + unsigned zero_from, zero_to, block_start, block_end; - offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ - /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - offset++; - } - index = size >> PAGE_CACHE_SHIFT; + BUG_ON(abs_from >= abs_to); + BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); + BUG_ON(abs_from & (inode->i_blkbits - 1)); page = grab_cache_page(mapping, index); if (!page) { @@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode, goto out; } - ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); - if (ret < 0) { - mlog_errno(ret); - goto out_unlock; - } + /* Get the offsets within the page that we want to zero */ + zero_from = abs_from & (PAGE_CACHE_SIZE - 1); + zero_to = abs_to & (PAGE_CACHE_SIZE - 1); + if (!zero_to) + zero_to = PAGE_CACHE_SIZE; - if (ocfs2_should_order_data(inode)) { - handle = ocfs2_start_walk_page_trans(inode, page, offset, - offset); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; + mlog(0, + "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n", + (unsigned long long)abs_from, (unsigned long long)abs_to, + index, zero_from, zero_to); + + /* We know that zero_from is block aligned */ + for (block_start = zero_from; block_start < zero_to; + block_start = block_end) { + block_end = block_start + (1 << inode->i_blkbits); + + /* + * block_start is block-aligned. Bump it by one to + * force ocfs2_{prepare,commit}_write() to zero the + * whole block. + */ + ret = ocfs2_prepare_write_nolock(inode, page, + block_start + 1, + block_start + 1); + if (ret < 0) { + mlog_errno(ret); goto out_unlock; } - } - /* must not update i_size! */ - ret = block_commit_write(page, offset, offset); - if (ret < 0) - mlog_errno(ret); - else - ret = 0; + if (!handle) { + handle = ocfs2_zero_start_ordered_transaction(inode); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + handle = NULL; + break; + } + } + + /* must not update i_size! */ + ret = block_commit_write(page, block_start + 1, + block_start + 1); + if (ret < 0) + mlog_errno(ret); + else + ret = 0; + } if (handle) ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); + out_unlock: unlock_page(page); page_cache_release(page); @@ -786,22 +838,114 @@ out: return ret; } -static int ocfs2_zero_extend(struct inode *inode, - u64 zero_to_size) +/* + * Find the next range to zero. We do this in terms of bytes because + * that's what ocfs2_zero_extend() wants, and it is dealing with the + * pagecache. We may return multiple extents. + * + * zero_start and zero_end are ocfs2_zero_extend()s current idea of what + * needs to be zeroed. range_start and range_end return the next zeroing + * range. A subsequent call should pass the previous range_end as its + * zero_start. If range_end is 0, there's nothing to do. + * + * Unwritten extents are skipped over. Refcounted extents are CoWd. + */ +static int ocfs2_zero_extend_get_range(struct inode *inode, + struct buffer_head *di_bh, + u64 zero_start, u64 zero_end, + u64 *range_start, u64 *range_end) { - int ret = 0; - u64 start_off; - struct super_block *sb = inode->i_sb; + int rc = 0, needs_cow = 0; + u32 p_cpos, zero_clusters = 0; + u32 zero_cpos = + zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; + u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end); + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; - start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); - while (start_off < zero_to_size) { - ret = ocfs2_write_zero_page(inode, start_off); - if (ret < 0) { - mlog_errno(ret); + while (zero_cpos < last_cpos) { + rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos, + &num_clusters, &ext_flags); + if (rc) { + mlog_errno(rc); + goto out; + } + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { + zero_clusters = num_clusters; + if (ext_flags & OCFS2_EXT_REFCOUNTED) + needs_cow = 1; + break; + } + + zero_cpos += num_clusters; + } + if (!zero_clusters) { + *range_end = 0; + goto out; + } + + while ((zero_cpos + zero_clusters) < last_cpos) { + rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters, + &p_cpos, &num_clusters, + &ext_flags); + if (rc) { + mlog_errno(rc); + goto out; + } + + if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)) + break; + if (ext_flags & OCFS2_EXT_REFCOUNTED) + needs_cow = 1; + zero_clusters += num_clusters; + } + if ((zero_cpos + zero_clusters) > last_cpos) + zero_clusters = last_cpos - zero_cpos; + + if (needs_cow) { + rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, + UINT_MAX); + if (rc) { + mlog_errno(rc); goto out; } + } + + *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos); + *range_end = ocfs2_clusters_to_bytes(inode->i_sb, + zero_cpos + zero_clusters); - start_off += sb->s_blocksize; +out: + return rc; +} + +/* + * Zero one range returned from ocfs2_zero_extend_get_range(). The caller + * has made sure that the entire range needs zeroing. + */ +static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, + u64 range_end) +{ + int rc = 0; + u64 next_pos; + u64 zero_pos = range_start; + + mlog(0, "range_start = %llu, range_end = %llu\n", + (unsigned long long)range_start, + (unsigned long long)range_end); + BUG_ON(range_start >= range_end); + + while (zero_pos < range_end) { + next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; + if (next_pos > range_end) + next_pos = range_end; + rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); + if (rc < 0) { + mlog_errno(rc); + break; + } + zero_pos = next_pos; /* * Very large extends have the potential to lock up @@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode, cond_resched(); } -out: + return rc; +} + +int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, + loff_t zero_to_size) +{ + int ret = 0; + u64 zero_start, range_start = 0, range_end = 0; + struct super_block *sb = inode->i_sb; + + zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); + mlog(0, "zero_start %llu for i_size %llu\n", + (unsigned long long)zero_start, + (unsigned long long)i_size_read(inode)); + while (zero_start < zero_to_size) { + ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, + zero_to_size, + &range_start, + &range_end); + if (ret) { + mlog_errno(ret); + break; + } + if (!range_end) + break; + /* Trim the ends */ + if (range_start < zero_start) + range_start = zero_start; + if (range_end > zero_to_size) + range_end = zero_to_size; + + ret = ocfs2_zero_extend_range(inode, range_start, + range_end); + if (ret) { + mlog_errno(ret); + break; + } + zero_start = range_end; + } + return ret; } -int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) +int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, + u64 new_i_size, u64 zero_to) { int ret; u32 clusters_to_add; struct ocfs2_inode_info *oi = OCFS2_I(inode); + /* + * Only quota files call this without a bh, and they can't be + * refcounted. + */ + BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE)); + clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); if (clusters_to_add < oi->ip_clusters) clusters_to_add = 0; @@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) * still need to zero the area between the old i_size and the * new i_size. */ - ret = ocfs2_zero_extend(inode, zero_to); + ret = ocfs2_zero_extend(inode, di_bh, zero_to); if (ret < 0) mlog_errno(ret); @@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode, goto out; if (i_size_read(inode) == new_i_size) - goto out; + goto out; BUG_ON(new_i_size < i_size_read(inode)); /* - * Fall through for converting inline data, even if the fs - * supports sparse files. - * - * The check for inline data here is legal - nobody can add - * the feature since we have i_mutex. We must check it again - * after acquiring ip_alloc_sem though, as paths like mmap - * might have raced us to converting the inode to extents. - */ - if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) - && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) - goto out_update_size; - - /* * The alloc sem blocks people in read/write from reading our * allocation until we're done changing it. We depend on * i_mutex to block other extend/truncate calls while we're - * here. + * here. We even have to hold it for sparse files because there + * might be some tail zeroing. */ down_write(&oi->ip_alloc_sem); @@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode, ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); if (ret) { up_write(&oi->ip_alloc_sem); - mlog_errno(ret); goto out; } } - if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) - ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); + if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) + ret = ocfs2_zero_extend(inode, di_bh, new_i_size); + else + ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size, + new_i_size); up_write(&oi->ip_alloc_sem); @@ -1052,18 +1233,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } /* - * This will intentionally not wind up calling simple_setsize(), + * This will intentionally not wind up calling truncate_setsize(), * since all the work for a size change has been done above. * Otherwise, we could get into problems with truncate as * ip_alloc_sem is used there to protect against i_size * changes. + * + * XXX: this means the conditional below can probably be removed. */ - status = inode_setattr(inode, attr); - if (status < 0) { - mlog_errno(status); - goto bail_commit; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + status = vmtruncate(inode, attr->ia_size); + if (status) { + mlog_errno(status); + goto bail_commit; + } } + setattr_copy(inode, attr); + mark_inode_dirty(inode); + status = ocfs2_mark_inode_dirty(handle, inode, bh); if (status < 0) mlog_errno(status); @@ -2119,12 +2308,12 @@ relock: * blocks outside i_size. Trim these off again. * Don't need i_size_read because we hold i_mutex. * - * XXX(hch): this looks buggy because ocfs2 did not + * XXX(truncate): this looks buggy because ocfs2 did not * actually implement ->truncate. Take a look at * the new truncate sequence and update this accordingly */ if (*ppos + count > inode->i_size) - simple_setsize(inode, inode->i_size); + truncate_setsize(inode, inode->i_size); ret = written; goto out_dio; } diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index d66cf4f7c70e..97bf761c9e7c 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, int ocfs2_simple_size_update(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size); -int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, - u64 zero_to); +int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, + u64 new_i_size, u64 zero_to); +int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, + loff_t zero_to); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index abb0a95cc717..0492464916b1 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -969,7 +969,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, truncate_inode_pages(&inode->i_data, 0); } -void ocfs2_delete_inode(struct inode *inode) +static void ocfs2_delete_inode(struct inode *inode) { int wipe, status; sigset_t oldset; @@ -1075,20 +1075,17 @@ bail_unlock_nfs_sync: bail_unblock: ocfs2_unblock_signals(&oldset); bail: - clear_inode(inode); mlog_exit_void(); } -void ocfs2_clear_inode(struct inode *inode) +static void ocfs2_clear_inode(struct inode *inode) { int status; struct ocfs2_inode_info *oi = OCFS2_I(inode); mlog_entry_void(); - if (!inode) - goto bail; - + end_writeback(inode); mlog(0, "Clearing inode: %llu, nlink = %u\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); @@ -1180,16 +1177,27 @@ void ocfs2_clear_inode(struct inode *inode) jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, &oi->ip_jinode); -bail: mlog_exit_void(); } +void ocfs2_evict_inode(struct inode *inode) +{ + if (!inode->i_nlink || + (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { + ocfs2_delete_inode(inode); + } else { + truncate_inode_pages(&inode->i_data, 0); + } + ocfs2_clear_inode(inode); +} + /* Called under inode_lock, with no more references on the * struct inode, so it's safe here to check the flags field * and to manipulate i_nlink without any other locks. */ -void ocfs2_drop_inode(struct inode *inode) +int ocfs2_drop_inode(struct inode *inode) { struct ocfs2_inode_info *oi = OCFS2_I(inode); + int res; mlog_entry_void(); @@ -1197,11 +1205,12 @@ void ocfs2_drop_inode(struct inode *inode) (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) - generic_delete_inode(inode); + res = 1; else - generic_drop_inode(inode); + res = generic_drop_inode(inode); mlog_exit_void(); + return res; } /* diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 9f5f5fcadc45..6de5a869db30 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -123,9 +123,8 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode) return &OCFS2_I(inode)->ip_metadata_cache; } -void ocfs2_clear_inode(struct inode *inode); -void ocfs2_delete_inode(struct inode *inode); -void ocfs2_drop_inode(struct inode *inode); +void ocfs2_evict_inode(struct inode *inode); +int ocfs2_drop_inode(struct inode *inode); /* Flags for ocfs2_iget() */ #define OCFS2_FI_FLAG_SYSFILE 0x1 diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 47878cf16418..9b57c0350ff9 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger return container_of(triggers, struct ocfs2_triggers, ot_triggers); } -static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Quota blocks have their own trigger because the struct ocfs2_block_check * offset depends on the blocksize. */ -static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Directory blocks also have their own trigger because the * struct ocfs2_block_check offset depends on the blocksize. */ -static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, static struct ocfs2_triggers di_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dinode, i_check), @@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = { static struct ocfs2_triggers eb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_extent_block, h_check), @@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = { static struct ocfs2_triggers rb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), @@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = { static struct ocfs2_triggers gd_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), @@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = { static struct ocfs2_triggers db_triggers = { .ot_triggers = { - .t_commit = ocfs2_db_commit_trigger, + .t_frozen = ocfs2_db_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers xb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), @@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = { static struct ocfs2_triggers dq_triggers = { .ot_triggers = { - .t_commit = ocfs2_dq_commit_trigger, + .t_frozen = ocfs2_dq_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers dr_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), @@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = { static struct ocfs2_triggers dl_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), @@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) if (osb->osb_commit_interval) commit_interval = osb->osb_commit_interval; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_commit_interval = commit_interval; if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) journal->j_flags |= JBD2_BARRIER; else journal->j_flags &= ~JBD2_BARRIER; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) @@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work) mutex_lock(&os->os_lock); ocfs2_queue_orphan_scan(osb); if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) - schedule_delayed_work(&os->os_orphan_scan_work, + queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, ocfs2_orphan_scan_timeout()); mutex_unlock(&os->os_lock); } @@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); else { atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); + queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); } } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 3d7419682dc0..ec6adbf8f551 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) { unsigned int la_mb; unsigned int gd_mb; + unsigned int la_max_mb; unsigned int megs_per_slot; struct super_block *sb = osb->sb; @@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) if (megs_per_slot < la_mb) la_mb = megs_per_slot; + /* We can't store more bits than we can in a block. */ + la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, + ocfs2_local_alloc_size(sb) * 8); + if (la_mb > la_max_mb) + la_mb = la_max_mb; + return la_mb; } diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 2bb35fe00511..4607923eb24c 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) * locking allocators ranks above a transaction start */ WARN_ON(journal_current_handle()); - status = ocfs2_extend_no_holes(gqinode, + status = ocfs2_extend_no_holes(gqinode, NULL, gqinode->i_size + (need_alloc << sb->s_blocksize_bits), gqinode->i_size); if (status < 0) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 8bd70d4d184d..dc78764ccc4c 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( u64 p_blkno; /* We are protected by dqio_sem so no locking needed */ - status = ocfs2_extend_no_holes(lqinode, + status = ocfs2_extend_no_holes(lqinode, NULL, lqinode->i_size + 2 * sb->s_blocksize, lqinode->i_size); if (status < 0) { @@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( return ocfs2_local_quota_add_chunk(sb, type, offset); /* We are protected by dqio_sem so no locking needed */ - status = ocfs2_extend_no_holes(lqinode, + status = ocfs2_extend_no_holes(lqinode, NULL, lqinode->i_size + sb->s_blocksize, lqinode->i_size); if (status < 0) { diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 4793f36f6518..73a11ccfd4c2 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2436,16 +2436,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + le32_to_cpu(rec.r_clusters)) - cpos; /* - * If the refcount rec already exist, cool. We just need - * to check whether there is a split. Otherwise we just need - * to increase the refcount. - * If we will insert one, increases recs_add. - * * We record all the records which will be inserted to the * same refcount block, so that we can tell exactly whether * we need a new refcount block or not. + * + * If we will insert a new one, this is easy and only happens + * during adding refcounted flag to the extent, so we don't + * have a chance of spliting. We just need one record. + * + * If the refcount rec already exists, that would be a little + * complicated. we may have to: + * 1) split at the beginning if the start pos isn't aligned. + * we need 1 more record in this case. + * 2) split int the end if the end pos isn't aligned. + * we need 1 more record in this case. + * 3) split in the middle because of file system fragmentation. + * we need 2 more records in this case(we can't detect this + * beforehand, so always think of the worst case). */ if (rec.r_refcount) { + recs_add += 2; /* Check whether we need a split at the beginning. */ if (cpos == start_cpos && cpos != le64_to_cpu(rec.r_cpos)) @@ -2931,6 +2941,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); + /* + * We only duplicate pages until we reach the page contains i_size - 1. + * So trim 'end' to i_size. + */ + if (end > i_size_read(context->inode)) + end = i_size_read(context->inode); while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; @@ -4166,6 +4182,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry, struct inode *inode = old_dentry->d_inode; struct buffer_head *new_bh = NULL; + if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) { + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + ret = filemap_fdatawrite(inode->i_mapping); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index 40650021fc24..d8b6e4259b80 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -26,7 +26,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/bitops.h> #include <linux/list.h> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f4c2a9eb8c4d..a8e6a95a353f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); - cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno); + cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno; if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) le16_add_cpu(&cl->cl_next_free_rec, 1); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0eaa929a4dbf..fa1be1b304d1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -145,8 +145,7 @@ static const struct super_operations ocfs2_sops = { .alloc_inode = ocfs2_alloc_inode, .destroy_inode = ocfs2_destroy_inode, .drop_inode = ocfs2_drop_inode, - .clear_inode = ocfs2_clear_inode, - .delete_inode = ocfs2_delete_inode, + .evict_inode = ocfs2_evict_inode, .sync_fs = ocfs2_sync_fs, .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, @@ -2472,7 +2471,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) kfree(osb->slot_recovery_generations); /* FIXME * This belongs in journal shutdown, but because we have to - * allocate osb->journal at the start of ocfs2_initalize_osb(), + * allocate osb->journal at the start of ocfs2_initialize_osb(), * we free it here. */ kfree(osb->journal); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index e97b34842cfe..d03469f61801 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, struct ocfs2_xattr_value_buf *vb, struct ocfs2_xattr_set_ctxt *ctxt) { - int status = 0; + int status = 0, credits; handle_t *handle = ctxt->handle; enum ocfs2_alloc_restarted why; u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); @@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); - status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { - mlog_errno(status); - goto leave; - } + while (clusters_to_add) { + status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + break; + } - prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); - status = ocfs2_add_clusters_in_btree(handle, - &et, - &logical_start, - clusters_to_add, - 0, - ctxt->data_ac, - ctxt->meta_ac, - &why); - if (status < 0) { - mlog_errno(status); - goto leave; - } + prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); + status = ocfs2_add_clusters_in_btree(handle, + &et, + &logical_start, + clusters_to_add, + 0, + ctxt->data_ac, + ctxt->meta_ac, + &why); + if ((status < 0) && (status != -EAGAIN)) { + if (status != -ENOSPC) + mlog_errno(status); + break; + } - ocfs2_journal_dirty(handle, vb->vb_bh); + ocfs2_journal_dirty(handle, vb->vb_bh); - clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; + clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - + prev_clusters; - /* - * We should have already allocated enough space before the transaction, - * so no need to restart. - */ - BUG_ON(why != RESTART_NONE || clusters_to_add); - -leave: + if (why != RESTART_NONE && clusters_to_add) { + /* + * We can only fail in case the alloc file doesn't give + * up enough clusters. + */ + BUG_ON(why == RESTART_META); + + mlog(0, "restarting xattr value extension for %u" + " clusters,.\n", clusters_to_add); + credits = ocfs2_calc_extend_credits(inode->i_sb, + &vb->vb_xv->xr_list, + clusters_to_add); + status = ocfs2_extend_trans(handle, credits); + if (status < 0) { + status = -ENOMEM; + mlog_errno(status); + break; + } + } + } return status; } @@ -6788,16 +6804,15 @@ out: return ret; } -static int ocfs2_reflink_xattr_buckets(handle_t *handle, +static int ocfs2_reflink_xattr_bucket(handle_t *handle, u64 blkno, u64 new_blkno, u32 clusters, + u32 *cpos, int num_buckets, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac, struct ocfs2_reflink_xattr_tree_args *args) { int i, j, ret = 0; struct super_block *sb = args->reflink->old_inode->i_sb; - u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); - u32 num_buckets = clusters * bpc; int bpb = args->old_bucket->bu_blocks; struct ocfs2_xattr_value_buf vb = { .vb_access = ocfs2_journal_access, @@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, break; } - /* - * The real bucket num in this series of blocks is stored - * in the 1st bucket. - */ - if (i == 0) - num_buckets = le16_to_cpu( - bucket_xh(args->old_bucket)->xh_num_buckets); - ret = ocfs2_xattr_bucket_journal_access(handle, args->new_bucket, OCFS2_JOURNAL_ACCESS_CREATE); @@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, bucket_block(args->old_bucket, j), sb->s_blocksize); + /* + * Record the start cpos so that we can use it to initialize + * our xattr tree we also set the xh_num_bucket for the new + * bucket. + */ + if (i == 0) { + *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> + xh_entries[0].xe_name_hash); + bucket_xh(args->new_bucket)->xh_num_buckets = + cpu_to_le16(num_buckets); + } + ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); ret = ocfs2_reflink_xattr_header(handle, args->reflink, @@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, } ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); + ocfs2_xattr_bucket_relse(args->old_bucket); ocfs2_xattr_bucket_relse(args->new_bucket); } @@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, ocfs2_xattr_bucket_relse(args->new_bucket); return ret; } + +static int ocfs2_reflink_xattr_buckets(handle_t *handle, + struct inode *inode, + struct ocfs2_reflink_xattr_tree_args *args, + struct ocfs2_extent_tree *et, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac, + u64 blkno, u32 cpos, u32 len) +{ + int ret, first_inserted = 0; + u32 p_cluster, num_clusters, reflink_cpos = 0; + u64 new_blkno; + unsigned int num_buckets, reflink_buckets; + unsigned int bpc = + ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); + + ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); + if (ret) { + mlog_errno(ret); + goto out; + } + num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); + ocfs2_xattr_bucket_relse(args->old_bucket); + + while (len && num_buckets) { + ret = ocfs2_claim_clusters(handle, data_ac, + 1, &p_cluster, &num_clusters); + if (ret) { + mlog_errno(ret); + goto out; + } + + new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); + reflink_buckets = min(num_buckets, bpc * num_clusters); + + ret = ocfs2_reflink_xattr_bucket(handle, blkno, + new_blkno, num_clusters, + &reflink_cpos, reflink_buckets, + meta_ac, data_ac, args); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * For the 1st allocated cluster, we make it use the same cpos + * so that the xattr tree looks the same as the original one + * in the most case. + */ + if (!first_inserted) { + reflink_cpos = cpos; + first_inserted = 1; + } + ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, + num_clusters, 0, meta_ac); + if (ret) + mlog_errno(ret); + + mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", + (unsigned long long)new_blkno, num_clusters, reflink_cpos); + + len -= num_clusters; + blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); + num_buckets -= reflink_buckets; + } +out: + return ret; +} + /* * Create the same xattr extent record in the new inode's xattr tree. */ @@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, void *para) { int ret, credits = 0; - u32 p_cluster, num_clusters; - u64 new_blkno; handle_t *handle; struct ocfs2_reflink_xattr_tree_args *args = (struct ocfs2_reflink_xattr_tree_args *)para; @@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_extent_tree et; + mlog(0, "reflink xattr buckets %llu len %u\n", + (unsigned long long)blkno, len); + ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(args->reflink->new_inode), args->new_blk_bh); @@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, goto out; } - ret = ocfs2_claim_clusters(handle, data_ac, - len, &p_cluster, &num_clusters); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster); - - mlog(0, "reflink xattr buckets %llu to %llu, len %u\n", - (unsigned long long)blkno, (unsigned long long)new_blkno, len); - ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len, - meta_ac, data_ac, args); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", - (unsigned long long)new_blkno, len, cpos); - ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno, - len, 0, meta_ac); + ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, + meta_ac, data_ac, + blkno, cpos, len); if (ret) mlog_errno(ret); -out_commit: ocfs2_commit_trans(osb, handle); out: |