diff options
Diffstat (limited to 'fs/smb/client/file.c')
-rw-r--r-- | fs/smb/client/file.c | 506 |
1 files changed, 320 insertions, 186 deletions
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index f391c9b803d8..9be37d0fe724 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) */ } -static inline int cifs_convert_flags(unsigned int flags) +static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) { if ((flags & O_ACCMODE) == O_RDONLY) return GENERIC_READ; else if ((flags & O_ACCMODE) == O_WRONLY) - return GENERIC_WRITE; + return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; else if ((flags & O_ACCMODE) == O_RDWR) { /* GENERIC_ALL is too much permission to request can cause unnecessary access denied on create */ @@ -329,7 +329,7 @@ int cifs_posix_open(const char *full_path, struct inode **pinode, } } else { cifs_revalidate_mapping(*pinode); - rc = cifs_fattr_to_inode(*pinode, &fattr); + rc = cifs_fattr_to_inode(*pinode, &fattr, false); } posix_open_ret: @@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ int create_options = CREATE_NOT_DIR; struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; if (!server->ops->open) return -ENOSYS; - desired_access = cifs_convert_flags(f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); /********************************************************************* * open flag mapping table: @@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ if (f_flags & O_DIRECT) create_options |= CREATE_NO_BUFFER; +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ }; rc = server->ops->open(xid, &oparms, oplock, buf); - if (rc) + if (rc) { + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } return rc; + } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); /* TODO: Add support for calling posix query info but with passing in fid */ if (tcon->unix_ext) @@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem) } static void cifsFileInfo_put_work(struct work_struct *work); +void serverclose_work(struct work_struct *work); struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock, @@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); INIT_WORK(&cfile->put, cifsFileInfo_put_work); + INIT_WORK(&cfile->serverclose, serverclose_work); INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); mutex_init(&cfile->fh_mutex); spin_lock_init(&cfile->file_info_lock); @@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) cifsFileInfo_put_final(cifs_file); } +void serverclose_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, serverclose); + + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + + struct TCP_Server_Info *server = tcon->ses->server; + int rc = 0; + int retries = 0; + int MAX_RETRIES = 4; + + do { + if (server->ops->close_getattr) + rc = server->ops->close_getattr(0, tcon, cifs_file); + else if (server->ops->close) + rc = server->ops->close(0, tcon, &cifs_file->fid); + + if (rc == -EBUSY || rc == -EAGAIN) { + retries++; + msleep(250); + } + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) + ); + + if (retries == MAX_RETRIES) + pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); + + if (cifs_file->offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); +} + /** * cifsFileInfo_put - release a reference of file priv data * @@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, struct cifs_fid fid = {}; struct cifs_pending_open open; bool oplock_break_cancelled; + bool serverclose_offloaded = false; spin_lock(&tcon->open_file_lock); spin_lock(&cifsi->open_file_lock); spin_lock(&cifs_file->file_info_lock); + + cifs_file->offload = offload; if (--cifs_file->count > 0) { spin_unlock(&cifs_file->file_info_lock); spin_unlock(&cifsi->open_file_lock); @@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; unsigned int xid; + int rc = 0; xid = get_xid(); if (server->ops->close_getattr) - server->ops->close_getattr(xid, tcon, cifs_file); + rc = server->ops->close_getattr(xid, tcon, cifs_file); else if (server->ops->close) - server->ops->close(xid, tcon, &cifs_file->fid); + rc = server->ops->close(xid, tcon, &cifs_file->fid); _free_xid(xid); + + if (rc == -EBUSY || rc == -EAGAIN) { + // Server close failed, hence offloading it as an async op + queue_work(serverclose_wq, &cifs_file->serverclose); + serverclose_offloaded = true; + } } if (oplock_break_cancelled) @@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, cifs_del_pending_open(&open); - if (offload) - queue_work(fileinfo_put_wq, &cifs_file->put); - else - cifsFileInfo_put_final(cifs_file); + // if serverclose has been offloaded to wq (on failure), it will + // handle offloading put as well. If serverclose not offloaded, + // we need to handle offloading put here. + if (!serverclose_offloaded) { + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); + } } int cifs_open(struct inode *inode, struct file *file) @@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file) use_cache: fscache_use_cookie(cifs_inode_cookie(file_inode(file)), file->f_mode & FMODE_WRITE); - if (file->f_flags & O_DIRECT && - (!((file->f_flags & O_ACCMODE) != O_RDONLY) || - file->f_flags & O_APPEND)) - cifs_invalidate_cache(file_inode(file), - FSCACHE_INVAL_DIO_WRITE); + if (!(file->f_flags & O_DIRECT)) + goto out; + if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) + goto out; + cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); out: free_dentry_path(page); @@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; struct cifs_open_parms oparms; + int rdwr_for_fscache = 0; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ - desired_access = cifs_convert_flags(cfile->f_flags); + /* If we're caching, we need to be able to fill in around partial writes. */ + if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) + rdwr_for_fscache = 1; + + desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) @@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); +retry_open: oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, @@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) /* indicate that we need to relock the file */ oparms.reconnect = true; } + if (rc == -EACCES && rdwr_for_fscache == 1) { + desired_access = cifs_convert_flags(cfile->f_flags, 0); + rdwr_for_fscache = 2; + goto retry_open; + } if (rc) { mutex_unlock(&cfile->fh_mutex); @@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) goto reopen_error_exit; } + if (rdwr_for_fscache == 2) + cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY reopen_success: #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -1072,6 +1151,19 @@ void smb2_deferred_work_close(struct work_struct *work) _cifsFileInfo_put(cfile, true, false); } +static bool +smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsInodeInfo *cinode = CIFS_I(inode); + + return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose && + (cinode->oplock == CIFS_CACHE_RHW_FLG || + cinode->oplock == CIFS_CACHE_RH_FLG) && + !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags)); + +} + int cifs_close(struct inode *inode, struct file *file) { struct cifsFileInfo *cfile; @@ -1085,10 +1177,8 @@ int cifs_close(struct inode *inode, struct file *file) cfile = file->private_data; file->private_data = NULL; dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); - if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) - && cinode->lease_granted && - !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && - dclose) { + if ((cfile->status_file_deleted == false) && + (smb2_can_defer_close(inode, dclose))) { if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); @@ -1315,20 +1405,20 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, down_read(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, offset, length, type, - flock->fl_flags, &conf_lock, + flock->c.flc_flags, &conf_lock, CIFS_LOCK_OP); if (exist) { flock->fl_start = conf_lock->offset; flock->fl_end = conf_lock->offset + conf_lock->length - 1; - flock->fl_pid = conf_lock->pid; + flock->c.flc_pid = conf_lock->pid; if (conf_lock->type & server->vals->shared_lock_type) - flock->fl_type = F_RDLCK; + flock->c.flc_type = F_RDLCK; else - flock->fl_type = F_WRLCK; + flock->c.flc_type = F_WRLCK; } else if (!cinode->can_cache_brlcks) rc = 1; else - flock->fl_type = F_UNLCK; + flock->c.flc_type = F_UNLCK; up_read(&cinode->lock_sem); return rc; @@ -1404,16 +1494,16 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock) { int rc = 0; struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); - unsigned char saved_type = flock->fl_type; + unsigned char saved_type = flock->c.flc_type; - if ((flock->fl_flags & FL_POSIX) == 0) + if ((flock->c.flc_flags & FL_POSIX) == 0) return 1; down_read(&cinode->lock_sem); posix_test_lock(file, flock); - if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { - flock->fl_type = saved_type; + if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) { + flock->c.flc_type = saved_type; rc = 1; } @@ -1434,7 +1524,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); int rc = FILE_LOCK_DEFERRED + 1; - if ((flock->fl_flags & FL_POSIX) == 0) + if ((flock->c.flc_flags & FL_POSIX) == 0) return rc; cifs_down_write(&cinode->lock_sem); @@ -1584,7 +1674,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) el = locks_to_send.next; spin_lock(&flctx->flc_lock); - list_for_each_entry(flock, &flctx->flc_posix, fl_list) { + for_each_file_lock(flock, &flctx->flc_posix) { + unsigned char ftype = flock->c.flc_type; + if (el == &locks_to_send) { /* * The list ended. We don't have enough allocated @@ -1594,12 +1686,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) break; } length = cifs_flock_len(flock); - if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) + if (ftype == F_RDLCK || ftype == F_SHLCK) type = CIFS_RDLCK; else type = CIFS_WRLCK; lck = list_entry(el, struct lock_to_push, llist); - lck->pid = hash_lockowner(flock->fl_owner); + lck->pid = hash_lockowner(flock->c.flc_owner); lck->netfid = cfile->fid.netfid; lck->length = length; lck->type = type; @@ -1666,42 +1758,43 @@ static void cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, bool *wait_flag, struct TCP_Server_Info *server) { - if (flock->fl_flags & FL_POSIX) + if (flock->c.flc_flags & FL_POSIX) cifs_dbg(FYI, "Posix\n"); - if (flock->fl_flags & FL_FLOCK) + if (flock->c.flc_flags & FL_FLOCK) cifs_dbg(FYI, "Flock\n"); - if (flock->fl_flags & FL_SLEEP) { + if (flock->c.flc_flags & FL_SLEEP) { cifs_dbg(FYI, "Blocking lock\n"); *wait_flag = true; } - if (flock->fl_flags & FL_ACCESS) + if (flock->c.flc_flags & FL_ACCESS) cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); - if (flock->fl_flags & FL_LEASE) + if (flock->c.flc_flags & FL_LEASE) cifs_dbg(FYI, "Lease on file - not implemented yet\n"); - if (flock->fl_flags & + if (flock->c.flc_flags & (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) - cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); + cifs_dbg(FYI, "Unknown lock flags 0x%x\n", + flock->c.flc_flags); *type = server->vals->large_lock_type; - if (flock->fl_type == F_WRLCK) { + if (lock_is_write(flock)) { cifs_dbg(FYI, "F_WRLCK\n"); *type |= server->vals->exclusive_lock_type; *lock = 1; - } else if (flock->fl_type == F_UNLCK) { + } else if (lock_is_unlock(flock)) { cifs_dbg(FYI, "F_UNLCK\n"); *type |= server->vals->unlock_lock_type; *unlock = 1; /* Check if unlock includes more than one lock range */ - } else if (flock->fl_type == F_RDLCK) { + } else if (lock_is_read(flock)) { cifs_dbg(FYI, "F_RDLCK\n"); *type |= server->vals->shared_lock_type; *lock = 1; - } else if (flock->fl_type == F_EXLCK) { + } else if (flock->c.flc_type == F_EXLCK) { cifs_dbg(FYI, "F_EXLCK\n"); *type |= server->vals->exclusive_lock_type; *lock = 1; - } else if (flock->fl_type == F_SHLCK) { + } else if (flock->c.flc_type == F_SHLCK) { cifs_dbg(FYI, "F_SHLCK\n"); *type |= server->vals->shared_lock_type; *lock = 1; @@ -1733,7 +1826,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, else posix_lock_type = CIFS_WRLCK; rc = CIFSSMBPosixLock(xid, tcon, netfid, - hash_lockowner(flock->fl_owner), + hash_lockowner(flock->c.flc_owner), flock->fl_start, length, flock, posix_lock_type, wait_flag); return rc; @@ -1750,7 +1843,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, if (rc == 0) { rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 0, 1, false); - flock->fl_type = F_UNLCK; + flock->c.flc_type = F_UNLCK; if (rc != 0) cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", rc); @@ -1758,7 +1851,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, } if (type & server->vals->shared_lock_type) { - flock->fl_type = F_WRLCK; + flock->c.flc_type = F_WRLCK; return 0; } @@ -1770,12 +1863,12 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, if (rc == 0) { rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type | server->vals->shared_lock_type, 0, 1, false); - flock->fl_type = F_RDLCK; + flock->c.flc_type = F_RDLCK; if (rc != 0) cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", rc); } else - flock->fl_type = F_WRLCK; + flock->c.flc_type = F_WRLCK; return 0; } @@ -1943,7 +2036,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, posix_lock_type = CIFS_UNLCK; rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, - hash_lockowner(flock->fl_owner), + hash_lockowner(flock->c.flc_owner), flock->fl_start, length, NULL, posix_lock_type, wait_flag); goto out; @@ -1953,7 +2046,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, struct cifsLockInfo *lock; lock = cifs_lock_init(flock->fl_start, length, type, - flock->fl_flags); + flock->c.flc_flags); if (!lock) return -ENOMEM; @@ -1992,7 +2085,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, rc = server->ops->mand_unlock_range(cfile, flock, xid); out: - if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { + if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) { /* * If this is a request to remove all locks because we * are closing the file, it doesn't matter if the @@ -2001,7 +2094,7 @@ out: */ if (rc) { cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); - if (!(flock->fl_flags & FL_CLOSE)) + if (!(flock->c.flc_flags & FL_CLOSE)) return rc; } rc = locks_lock_file_wait(file, flock); @@ -2022,7 +2115,7 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) xid = get_xid(); - if (!(fl->fl_flags & FL_FLOCK)) { + if (!(fl->c.flc_flags & FL_FLOCK)) { rc = -ENOLCK; free_xid(xid); return rc; @@ -2073,7 +2166,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) xid = get_xid(); cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, - flock->fl_flags, flock->fl_type, (long long)flock->fl_start, + flock->c.flc_flags, flock->c.flc_type, + (long long)flock->fl_start, (long long)flock->fl_end); cfile = (struct cifsFileInfo *)file->private_data; @@ -2624,20 +2718,20 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) * dirty pages if possible, but don't sleep while doing so. */ static void cifs_extend_writeback(struct address_space *mapping, + struct xa_state *xas, long *_count, loff_t start, int max_pages, - size_t max_len, - unsigned int *_len) + loff_t max_len, + size_t *_len) { struct folio_batch batch; struct folio *folio; - unsigned int psize, nr_pages; - size_t len = *_len; - pgoff_t index = (start + len) / PAGE_SIZE; + unsigned int nr_pages; + pgoff_t index = (start + *_len) / PAGE_SIZE; + size_t len; bool stop = true; unsigned int i; - XA_STATE(xas, &mapping->i_pages, index); folio_batch_init(&batch); @@ -2648,54 +2742,64 @@ static void cifs_extend_writeback(struct address_space *mapping, */ rcu_read_lock(); - xas_for_each(&xas, folio, ULONG_MAX) { + xas_for_each(xas, folio, ULONG_MAX) { stop = true; - if (xas_retry(&xas, folio)) + if (xas_retry(xas, folio)) continue; if (xa_is_value(folio)) break; - if (folio->index != index) + if (folio->index != index) { + xas_reset(xas); break; + } + if (!folio_try_get_rcu(folio)) { - xas_reset(&xas); + xas_reset(xas); continue; } nr_pages = folio_nr_pages(folio); - if (nr_pages > max_pages) + if (nr_pages > max_pages) { + xas_reset(xas); break; + } /* Has the page moved or been split? */ - if (unlikely(folio != xas_reload(&xas))) { + if (unlikely(folio != xas_reload(xas))) { folio_put(folio); + xas_reset(xas); break; } if (!folio_trylock(folio)) { folio_put(folio); + xas_reset(xas); break; } - if (!folio_test_dirty(folio) || folio_test_writeback(folio)) { + if (!folio_test_dirty(folio) || + folio_test_writeback(folio)) { folio_unlock(folio); folio_put(folio); + xas_reset(xas); break; } max_pages -= nr_pages; - psize = folio_size(folio); - len += psize; + len = folio_size(folio); stop = false; - if (max_pages <= 0 || len >= max_len || *_count <= 0) - stop = true; index += nr_pages; + *_count -= nr_pages; + *_len += len; + if (max_pages <= 0 || *_len >= max_len || *_count <= 0) + stop = true; + if (!folio_batch_add(&batch, folio)) break; if (stop) break; } - if (!stop) - xas_pause(&xas); + xas_pause(xas); rcu_read_unlock(); /* Now, if we obtained any pages, we can shift them to being @@ -2712,16 +2816,12 @@ static void cifs_extend_writeback(struct address_space *mapping, if (!folio_clear_dirty_for_io(folio)) WARN_ON(1); folio_start_writeback(folio); - - *_count -= folio_nr_pages(folio); folio_unlock(folio); } folio_batch_release(&batch); cond_resched(); } while (!stop); - - *_len = len; } /* @@ -2729,8 +2829,10 @@ static void cifs_extend_writeback(struct address_space *mapping, */ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, struct writeback_control *wbc, + struct xa_state *xas, struct folio *folio, - loff_t start, loff_t end) + unsigned long long start, + unsigned long long end) { struct inode *inode = mapping->host; struct TCP_Server_Info *server; @@ -2739,17 +2841,18 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, struct cifs_credits credits_on_stack; struct cifs_credits *credits = &credits_on_stack; struct cifsFileInfo *cfile = NULL; - unsigned int xid, wsize, len; - loff_t i_size = i_size_read(inode); - size_t max_len; + unsigned long long i_size = i_size_read(inode), max_len; + unsigned int xid, wsize; + size_t len = folio_size(folio); long count = wbc->nr_to_write; int rc; /* The folio should be locked, dirty and not undergoing writeback. */ + if (!folio_clear_dirty_for_io(folio)) + WARN_ON_ONCE(1); folio_start_writeback(folio); count -= folio_nr_pages(folio); - len = folio_size(folio); xid = get_xid(); server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); @@ -2779,9 +2882,10 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, wdata->server = server; cfile = NULL; - /* Find all consecutive lockable dirty pages, stopping when we find a - * page that is not immediately lockable, is not dirty or is missing, - * or we reach the end of the range. + /* Find all consecutive lockable dirty pages that have contiguous + * written regions, stopping when we find a page that is not + * immediately lockable, is not dirty or is missing, or we reach the + * end of the range. */ if (start < i_size) { /* Trim the write to the EOF; the extra data is ignored. Also @@ -2801,19 +2905,18 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, max_pages -= folio_nr_pages(folio); if (max_pages > 0) - cifs_extend_writeback(mapping, &count, start, + cifs_extend_writeback(mapping, xas, &count, start, max_pages, max_len, &len); } - len = min_t(loff_t, len, max_len); } - - wdata->bytes = len; + len = min_t(unsigned long long, len, i_size - start); /* We now have a contiguous set of dirty pages, each with writeback * set; the first page is still locked at this point, but all the rest * have been unlocked. */ folio_unlock(folio); + wdata->bytes = len; if (start < i_size) { iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, @@ -2864,102 +2967,118 @@ err_xid: /* * write a region of pages back to the server */ -static int cifs_writepages_region(struct address_space *mapping, - struct writeback_control *wbc, - loff_t start, loff_t end, loff_t *_next) +static ssize_t cifs_writepages_begin(struct address_space *mapping, + struct writeback_control *wbc, + struct xa_state *xas, + unsigned long long *_start, + unsigned long long end) { - struct folio_batch fbatch; + struct folio *folio; + unsigned long long start = *_start; + ssize_t ret; int skips = 0; - folio_batch_init(&fbatch); - do { - int nr; - pgoff_t index = start / PAGE_SIZE; +search_again: + /* Find the first dirty page. */ + rcu_read_lock(); - nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE, - PAGECACHE_TAG_DIRTY, &fbatch); - if (!nr) + for (;;) { + folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); + if (xas_retry(xas, folio) || xa_is_value(folio)) + continue; + if (!folio) break; - for (int i = 0; i < nr; i++) { - ssize_t ret; - struct folio *folio = fbatch.folios[i]; + if (!folio_try_get_rcu(folio)) { + xas_reset(xas); + continue; + } -redo_folio: - start = folio_pos(folio); /* May regress with THPs */ + if (unlikely(folio != xas_reload(xas))) { + folio_put(folio); + xas_reset(xas); + continue; + } - /* At this point we hold neither the i_pages lock nor the - * page lock: the page may be truncated or invalidated - * (changing page->mapping to NULL), or even swizzled - * back from swapper_space to tmpfs file mapping - */ - if (wbc->sync_mode != WB_SYNC_NONE) { - ret = folio_lock_killable(folio); - if (ret < 0) - goto write_error; - } else { - if (!folio_trylock(folio)) - goto skip_write; - } + xas_pause(xas); + break; + } + rcu_read_unlock(); + if (!folio) + return 0; - if (folio->mapping != mapping || - !folio_test_dirty(folio)) { - start += folio_size(folio); - folio_unlock(folio); - continue; - } + start = folio_pos(folio); /* May regress with THPs */ - if (folio_test_writeback(folio) || - folio_test_fscache(folio)) { - folio_unlock(folio); - if (wbc->sync_mode == WB_SYNC_NONE) - goto skip_write; + /* At this point we hold neither the i_pages lock nor the page lock: + * the page may be truncated or invalidated (changing page->mapping to + * NULL), or even swizzled back from swapper_space to tmpfs file + * mapping + */ +lock_again: + if (wbc->sync_mode != WB_SYNC_NONE) { + ret = folio_lock_killable(folio); + if (ret < 0) + return ret; + } else { + if (!folio_trylock(folio)) + goto search_again; + } + + if (folio->mapping != mapping || + !folio_test_dirty(folio)) { + start += folio_size(folio); + folio_unlock(folio); + goto search_again; + } - folio_wait_writeback(folio); + if (folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + if (wbc->sync_mode != WB_SYNC_NONE) { + folio_wait_writeback(folio); #ifdef CONFIG_CIFS_FSCACHE - folio_wait_fscache(folio); + folio_wait_fscache(folio); #endif - goto redo_folio; - } - - if (!folio_clear_dirty_for_io(folio)) - /* We hold the page lock - it should've been dirty. */ - WARN_ON(1); - - ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end); - if (ret < 0) - goto write_error; - - start += ret; - continue; - -write_error: - folio_batch_release(&fbatch); - *_next = start; - return ret; + goto lock_again; + } -skip_write: - /* - * Too many skipped writes, or need to reschedule? - * Treat it as a write error without an error code. - */ + start += folio_size(folio); + if (wbc->sync_mode == WB_SYNC_NONE) { if (skips >= 5 || need_resched()) { ret = 0; - goto write_error; + goto out; } - - /* Otherwise, just skip that folio and go on to the next */ skips++; - start += folio_size(folio); - continue; } + goto search_again; + } - folio_batch_release(&fbatch); - cond_resched(); - } while (wbc->nr_to_write > 0); + ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); +out: + if (ret > 0) + *_start = start + ret; + return ret; +} - *_next = start; - return 0; +/* + * Write a region of pages back to the server + */ +static int cifs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + unsigned long long *_start, + unsigned long long end) +{ + ssize_t ret; + + XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); + + do { + ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); + if (ret > 0 && wbc->nr_to_write > 0) + cond_resched(); + } while (ret > 0 && wbc->nr_to_write > 0); + + return ret > 0 ? 0 : ret; } /* @@ -2968,7 +3087,7 @@ skip_write: static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - loff_t start, next; + loff_t start, end; int ret; /* We have to be careful as we can end up racing with setattr() @@ -2976,28 +3095,34 @@ static int cifs_writepages(struct address_space *mapping, * to prevent it. */ - if (wbc->range_cyclic) { + if (wbc->range_cyclic && mapping->writeback_index) { start = mapping->writeback_index * PAGE_SIZE; - ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); - if (ret == 0) { - mapping->writeback_index = next / PAGE_SIZE; - if (start > 0 && wbc->nr_to_write > 0) { - ret = cifs_writepages_region(mapping, wbc, 0, - start, &next); - if (ret == 0) - mapping->writeback_index = - next / PAGE_SIZE; - } + ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); + if (ret < 0) + goto out; + + if (wbc->nr_to_write <= 0) { + mapping->writeback_index = start / PAGE_SIZE; + goto out; } + + start = 0; + end = mapping->writeback_index * PAGE_SIZE; + mapping->writeback_index = 0; + ret = cifs_writepages_region(mapping, wbc, &start, end); + if (ret == 0) + mapping->writeback_index = start / PAGE_SIZE; } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { - ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); + start = 0; + ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); if (wbc->nr_to_write > 0 && ret == 0) - mapping->writeback_index = next / PAGE_SIZE; + mapping->writeback_index = start / PAGE_SIZE; } else { - ret = cifs_writepages_region(mapping, wbc, - wbc->range_start, wbc->range_end, &next); + start = wbc->range_start; + ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); } +out: return ret; } @@ -3094,8 +3219,15 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, if (rc > 0) { spin_lock(&inode->i_lock); if (pos > inode->i_size) { + loff_t additional_blocks = (512 - 1 + copied) >> 9; + i_size_write(inode, pos); - inode->i_blocks = (512 - 1 + pos) >> 9; + /* + * Estimate new allocation size based on the amount written. + * This will be updated from server on close (and on queryinfo) + */ + inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, + inode->i_blocks + additional_blocks); } spin_unlock(&inode->i_lock); } @@ -4738,12 +4870,14 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode) refreshing the inode only on increases in the file size but this is tricky to do without racing with writebehind page caching in the current Linux kernel design */ -bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) +bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, + bool from_readdir) { if (!cifsInode) return true; - if (is_inode_writable(cifsInode)) { + if (is_inode_writable(cifsInode) || + ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { /* This inode is open for write at least once */ struct cifs_sb_info *cifs_sb; |