summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/coda/file.c23
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/fuse/dev.c63
-rw-r--r--fs/gfs2/file.c28
-rw-r--r--fs/nfs/file.c25
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/ocfs2/file.c34
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/pipe.c13
-rw-r--r--fs/splice.c683
-rw-r--r--fs/xfs/xfs_file.c41
-rw-r--r--fs/xfs/xfs_trace.h1
13 files changed, 230 insertions, 690 deletions
diff --git a/fs/coda/file.c b/fs/coda/file.c
index f47c7483863b..8415d4f8d1a1 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -38,27 +38,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
static ssize_t
-coda_file_splice_read(struct file *coda_file, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- ssize_t (*splice_read)(struct file *, loff_t *,
- struct pipe_inode_info *, size_t, unsigned int);
- struct coda_file_info *cfi;
- struct file *host_file;
-
- cfi = CODA_FTOC(coda_file);
- BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- host_file = cfi->cfi_container;
-
- splice_read = host_file->f_op->splice_read;
- if (!splice_read)
- splice_read = default_file_splice_read;
-
- return splice_read(host_file, ppos, pipe, count, flags);
-}
-
-static ssize_t
coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *coda_file = iocb->ki_filp;
@@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {
.open = coda_open,
.release = coda_release,
.fsync = coda_fsync,
- .splice_read = coda_file_splice_read,
+ .splice_read = generic_file_splice_read,
};
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7c3ce73cb617..fb9aa16a7727 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if ((dio->op == REQ_OP_READ) &&
((offset + transferred) > dio->i_size))
transferred = dio->i_size - offset;
+ /* ignore EFAULT if some IO has been done */
+ if (unlikely(ret == -EFAULT) && transferred)
+ ret = 0;
}
if (ret == 0)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c41bde26c338..70ea57c7b6bb 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
struct pipe_buffer *buf = cs->pipebufs;
if (!cs->write) {
- err = buf->ops->confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
@@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
fuse_copy_finish(cs);
- err = buf->ops->confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf);
if (err)
return err;
@@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (cs->len != PAGE_SIZE)
goto out_fallback;
- if (buf->ops->steal(cs->pipe, buf) != 0)
+ if (pipe_buf_steal(cs->pipe, buf) != 0)
goto out_fallback;
newpage = buf->page;
@@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
- int ret;
+ int total, ret;
int page_nr = 0;
- int do_wakeup = 0;
struct pipe_buffer *bufs;
struct fuse_copy_state cs;
struct fuse_dev *fud = fuse_get_dev(in);
@@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;
- ret = 0;
- pipe_lock(pipe);
-
- if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
- if (!ret)
- ret = -EPIPE;
- goto out_unlock;
- }
-
if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
ret = -EIO;
- goto out_unlock;
+ goto out;
}
- while (page_nr < cs.nr_segs) {
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
- struct pipe_buffer *buf = pipe->bufs + newbuf;
-
- buf->page = bufs[page_nr].page;
- buf->offset = bufs[page_nr].offset;
- buf->len = bufs[page_nr].len;
+ for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
/*
* Need to be careful about this. Having buf->ops in module
* code can Oops if the buffer persists after module unload.
*/
- buf->ops = &nosteal_pipe_buf_ops;
-
- pipe->nrbufs++;
- page_nr++;
- ret += buf->len;
-
- if (pipe->files)
- do_wakeup = 1;
- }
-
-out_unlock:
- pipe_unlock(pipe);
-
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ bufs[page_nr].ops = &nosteal_pipe_buf_ops;
+ ret = add_to_pipe(pipe, &bufs[page_nr++]);
+ if (unlikely(ret < 0))
+ break;
}
-
+ if (total)
+ ret = total;
out:
for (; page_nr < cs.nr_segs; page_nr++)
put_page(bufs[page_nr].page);
@@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- ibuf->ops->get(pipe, ibuf);
+ pipe_buf_get(pipe, ibuf);
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
- for (idx = 0; idx < nbuf; idx++) {
- struct pipe_buffer *buf = &bufs[idx];
- buf->ops->release(pipe, buf);
- }
+ for (idx = 0; idx < nbuf; idx++)
+ pipe_buf_release(pipe, &bufs[idx]);
+
out:
kfree(bufs);
return ret;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 360188f162bd..e23ff70b3435 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -954,30 +954,6 @@ out_uninit:
return ret;
}
-static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- struct inode *inode = in->f_mapping->host;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- int ret;
-
- inode_lock(inode);
-
- ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
- if (ret) {
- inode_unlock(inode);
- return ret;
- }
-
- gfs2_glock_dq_uninit(&gh);
- inode_unlock(inode);
-
- return generic_file_splice_read(in, ppos, pipe, len, flags);
-}
-
-
static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
@@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
- .splice_read = gfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
@@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
- .splice_read = gfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ca699ddc11c1..2efbdde36c3e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
}
EXPORT_SYMBOL_GPL(nfs_file_read);
-ssize_t
-nfs_file_splice_read(struct file *filp, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- struct inode *inode = file_inode(filp);
- ssize_t res;
-
- dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
- filp, (unsigned long) count, (unsigned long long) *ppos);
-
- nfs_start_io_read(inode);
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
- if (!res) {
- res = generic_file_splice_read(filp, ppos, pipe, count, flags);
- if (res > 0)
- nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
- }
- nfs_end_io_read(inode);
- return res;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_read);
-
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
@@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = nfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 74935a19e4bf..d7b062bdc504 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
-ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
- size_t, unsigned int);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index d085ad794884..89a77950e0b0 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = nfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0b055bfb8e86..8f91639f8364 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2321,36 +2321,6 @@ out_mutex:
return ret;
}
-static ssize_t ocfs2_file_splice_read(struct file *in,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len,
- unsigned int flags)
-{
- int ret = 0, lock_level = 0;
- struct inode *inode = file_inode(in);
-
- trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- in->f_path.dentry->d_name.len,
- in->f_path.dentry->d_name.name, len);
-
- /*
- * See the comment in ocfs2_file_read_iter()
- */
- ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
- if (ret < 0) {
- mlog_errno(ret);
- goto bail;
- }
- ocfs2_inode_unlock(inode, lock_level);
-
- ret = generic_file_splice_read(in, ppos, pipe, len, flags);
-
-bail:
- return ret;
-}
-
static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
struct iov_iter *to)
{
@@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
- .splice_read = ocfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
@@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
.compat_ioctl = ocfs2_compat_ioctl,
#endif
.flock = ocfs2_flock,
- .splice_read = ocfs2_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
};
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index f8f5fc5e6c05..0b58abcf1c6d 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
-
DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
diff --git a/fs/pipe.c b/fs/pipe.c
index 4ebe6b2e5217..4fc422f0dea8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (bufs) {
int curbuf = pipe->curbuf;
struct pipe_buffer *buf = pipe->bufs + curbuf;
- const struct pipe_buf_operations *ops = buf->ops;
size_t chars = buf->len;
size_t written;
int error;
@@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (chars > total_len)
chars = total_len;
- error = ops->confirm(pipe, buf);
+ error = pipe_buf_confirm(pipe, buf);
if (error) {
if (!ret)
ret = error;
@@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
}
if (!buf->len) {
- buf->ops = NULL;
- ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
curbuf = (curbuf + 1) & (pipe->buffers - 1);
pipe->curbuf = curbuf;
pipe->nrbufs = --bufs;
@@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
(pipe->buffers - 1);
struct pipe_buffer *buf = pipe->bufs + lastbuf;
- const struct pipe_buf_operations *ops = buf->ops;
int offset = buf->offset + buf->len;
- if (ops->can_merge && offset + chars <= PAGE_SIZE) {
- ret = ops->confirm(pipe, buf);
+ if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
+ ret = pipe_buf_confirm(pipe, buf);
if (ret)
goto out;
@@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
- buf->ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
}
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
diff --git a/fs/splice.c b/fs/splice.c
index dd9bf7e410d2..aa38901a4f10 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
{
unsigned int spd_pages = spd->nr_pages;
- int ret, do_wakeup, page_nr;
+ int ret = 0, page_nr = 0;
if (!spd_pages)
return 0;
- ret = 0;
- do_wakeup = 0;
- page_nr = 0;
-
- pipe_lock(pipe);
-
- for (;;) {
- if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
- if (!ret)
- ret = -EPIPE;
- break;
- }
-
- if (pipe->nrbufs < pipe->buffers) {
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
- struct pipe_buffer *buf = pipe->bufs + newbuf;
-
- buf->page = spd->pages[page_nr];
- buf->offset = spd->partial[page_nr].offset;
- buf->len = spd->partial[page_nr].len;
- buf->private = spd->partial[page_nr].private;
- buf->ops = spd->ops;
- if (spd->flags & SPLICE_F_GIFT)
- buf->flags |= PIPE_BUF_FLAG_GIFT;
-
- pipe->nrbufs++;
- page_nr++;
- ret += buf->len;
-
- if (pipe->files)
- do_wakeup = 1;
+ if (unlikely(!pipe->readers)) {
+ send_sig(SIGPIPE, current, 0);
+ ret = -EPIPE;
+ goto out;
+ }
- if (!--spd->nr_pages)
- break;
- if (pipe->nrbufs < pipe->buffers)
- continue;
+ while (pipe->nrbufs < pipe->buffers) {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ struct pipe_buffer *buf = pipe->bufs + newbuf;
- break;
- }
+ buf->page = spd->pages[page_nr];
+ buf->offset = spd->partial[page_nr].offset;
+ buf->len = spd->partial[page_nr].len;
+ buf->private = spd->partial[page_nr].private;
+ buf->ops = spd->ops;
- if (spd->flags & SPLICE_F_NONBLOCK) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
+ pipe->nrbufs++;
+ page_nr++;
+ ret += buf->len;
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
+ if (!--spd->nr_pages)
break;
- }
-
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- do_wakeup = 0;
- }
-
- pipe->waiting_writers++;
- pipe_wait(pipe);
- pipe->waiting_writers--;
}
- pipe_unlock(pipe);
-
- if (do_wakeup)
- wakeup_pipe_readers(pipe);
+ if (!ret)
+ ret = -EAGAIN;
+out:
while (page_nr < spd_pages)
spd->spd_release(spd, page_nr++);
@@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
}
EXPORT_SYMBOL_GPL(splice_to_pipe);
+ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+{
+ int ret;
+
+ if (unlikely(!pipe->readers)) {
+ send_sig(SIGPIPE, current, 0);
+ ret = -EPIPE;
+ } else if (pipe->nrbufs == pipe->buffers) {
+ ret = -EAGAIN;
+ } else {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ pipe->bufs[newbuf] = *buf;
+ pipe->nrbufs++;
+ return buf->len;
+ }
+ pipe_buf_release(pipe, buf);
+ return ret;
+}
+EXPORT_SYMBOL(add_to_pipe);
+
void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
put_page(spd->pages[i]);
@@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
kfree(spd->partial);
}
-static int
-__generic_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- struct address_space *mapping = in->f_mapping;
- unsigned int loff, nr_pages, req_pages;
- struct page *pages[PIPE_DEF_BUFFERS];
- struct partial_page partial[PIPE_DEF_BUFFERS];
- struct page *page;
- pgoff_t index, end_index;
- loff_t isize;
- int error, page_nr;
- struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
- .nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
- .ops = &page_cache_pipe_buf_ops,
- .spd_release = spd_release_page,
- };
-
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
-
- index = *ppos >> PAGE_SHIFT;
- loff = *ppos & ~PAGE_MASK;
- req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
- nr_pages = min(req_pages, spd.nr_pages_max);
-
- /*
- * Lookup the (hopefully) full range of pages we need.
- */
- spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
- index += spd.nr_pages;
-
- /*
- * If find_get_pages_contig() returned fewer pages than we needed,
- * readahead/allocate the rest and fill in the holes.
- */
- if (spd.nr_pages < nr_pages)
- page_cache_sync_readahead(mapping, &in->f_ra, in,
- index, req_pages - spd.nr_pages);
-
- error = 0;
- while (spd.nr_pages < nr_pages) {
- /*
- * Page could be there, find_get_pages_contig() breaks on
- * the first hole.
- */
- page = find_get_page(mapping, index);
- if (!page) {
- /*
- * page didn't exist, allocate one.
- */
- page = page_cache_alloc_cold(mapping);
- if (!page)
- break;
-
- error = add_to_page_cache_lru(page, mapping, index,
- mapping_gfp_constraint(mapping, GFP_KERNEL));
- if (unlikely(error)) {
- put_page(page);
- if (error == -EEXIST)
- continue;
- break;
- }
- /*
- * add_to_page_cache() locks the page, unlock it
- * to avoid convoluting the logic below even more.
- */
- unlock_page(page);
- }
-
- spd.pages[spd.nr_pages++] = page;
- index++;
- }
-
- /*
- * Now loop over the map and see if we need to start IO on any
- * pages, fill in the partial map, etc.
- */
- index = *ppos >> PAGE_SHIFT;
- nr_pages = spd.nr_pages;
- spd.nr_pages = 0;
- for (page_nr = 0; page_nr < nr_pages; page_nr++) {
- unsigned int this_len;
-
- if (!len)
- break;
-
- /*
- * this_len is the max we'll use from this page
- */
- this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
- page = spd.pages[page_nr];
-
- if (PageReadahead(page))
- page_cache_async_readahead(mapping, &in->f_ra, in,
- page, index, req_pages - page_nr);
-
- /*
- * If the page isn't uptodate, we may need to start io on it
- */
- if (!PageUptodate(page)) {
- lock_page(page);
-
- /*
- * Page was truncated, or invalidated by the
- * filesystem. Redo the find/create, but this time the
- * page is kept locked, so there's no chance of another
- * race with truncate/invalidate.
- */
- if (!page->mapping) {
- unlock_page(page);
-retry_lookup:
- page = find_or_create_page(mapping, index,
- mapping_gfp_mask(mapping));
-
- if (!page) {
- error = -ENOMEM;
- break;
- }
- put_page(spd.pages[page_nr]);
- spd.pages[page_nr] = page;
- }
- /*
- * page was already under io and is now done, great
- */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto fill_it;
- }
-
- /*
- * need to read in the page
- */
- error = mapping->a_ops->readpage(in, page);
- if (unlikely(error)) {
- /*
- * Re-lookup the page
- */
- if (error == AOP_TRUNCATED_PAGE)
- goto retry_lookup;
-
- break;
- }
- }
-fill_it:
- /*
- * i_size must be checked after PageUptodate.
- */
- isize = i_size_read(mapping->host);
- end_index = (isize - 1) >> PAGE_SHIFT;
- if (unlikely(!isize || index > end_index))
- break;
-
- /*
- * if this is the last page, see if we need to shrink
- * the length and stop
- */
- if (end_index == index) {
- unsigned int plen;
-
- /*
- * max good bytes in this page
- */
- plen = ((isize - 1) & ~PAGE_MASK) + 1;
- if (plen <= loff)
- break;
-
- /*
- * force quit after adding this page
- */
- this_len = min(this_len, plen - loff);
- len = this_len;
- }
-
- spd.partial[page_nr].offset = loff;
- spd.partial[page_nr].len = this_len;
- len -= this_len;
- loff = 0;
- spd.nr_pages++;
- index++;
- }
-
- /*
- * Release any pages at the end, if we quit early. 'page_nr' is how far
- * we got, 'nr_pages' is how many pages are in the map.
- */
- while (page_nr < nr_pages)
- put_page(spd.pages[page_nr++]);
- in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
-
- if (spd.nr_pages)
- error = splice_to_pipe(pipe, &spd);
-
- splice_shrink_spd(&spd);
- return error;
-}
-
/**
* generic_file_splice_read - splice data from file to a pipe
* @in: file to splice from
@@ -514,39 +290,53 @@ fill_it:
*
* Description:
* Will read pages from given file and fill them into a pipe. Can be
- * used as long as the address_space operations for the source implements
- * a readpage() hook.
+ * used as long as it has more or less sane ->read_iter().
*
*/
ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
- loff_t isize, left;
- int ret;
-
- if (IS_DAX(in->f_mapping->host))
- return default_file_splice_read(in, ppos, pipe, len, flags);
+ struct iov_iter to;
+ struct kiocb kiocb;
+ loff_t isize;
+ int idx, ret;
isize = i_size_read(in->f_mapping->host);
if (unlikely(*ppos >= isize))
return 0;
- left = isize - *ppos;
- if (unlikely(left < len))
- len = left;
-
- ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
+ iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+ idx = to.idx;
+ init_sync_kiocb(&kiocb, in);
+ kiocb.ki_pos = *ppos;
+ ret = in->f_op->read_iter(&kiocb, &to);
if (ret > 0) {
- *ppos += ret;
+ *ppos = kiocb.ki_pos;
file_accessed(in);
+ } else if (ret < 0) {
+ if (WARN_ON(to.idx != idx || to.iov_offset)) {
+ /*
+ * a bogus ->read_iter() has copied something and still
+ * returned an error instead of a short read.
+ */
+ to.idx = idx;
+ to.iov_offset = 0;
+ iov_iter_advance(&to, 0); /* to free what was emitted */
+ }
+ /*
+ * callers of ->splice_read() expect -EAGAIN on
+ * "can't put anything in there", rather than -EFAULT.
+ */
+ if (ret == -EFAULT)
+ ret = -EAGAIN;
}
return ret;
}
EXPORT_SYMBOL(generic_file_splice_read);
-static const struct pipe_buf_operations default_pipe_buf_ops = {
+const struct pipe_buf_operations default_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm,
.release = generic_pipe_buf_release,
@@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
};
EXPORT_SYMBOL(nosteal_pipe_buf_ops);
-static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
unsigned long vlen, loff_t offset)
{
mm_segment_t old_fs;
@@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count,
}
EXPORT_SYMBOL(kernel_write);
-ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
+ struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
+ struct iov_iter to;
+ struct page **pages;
unsigned int nr_pages;
- unsigned int nr_freed;
- size_t offset;
- struct page *pages[PIPE_DEF_BUFFERS];
- struct partial_page partial[PIPE_DEF_BUFFERS];
- struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
+ size_t offset, dummy, copied = 0;
ssize_t res;
- size_t this_len;
- int error;
int i;
- struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
- .nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
- .ops = &default_pipe_buf_ops,
- .spd_release = spd_release_page,
- };
- if (splice_grow_spd(pipe, &spd))
+ if (pipe->nrbufs == pipe->buffers)
+ return -EAGAIN;
+
+ /*
+ * Try to keep page boundaries matching to source pagecache ones -
+ * it probably won't be much help, but...
+ */
+ offset = *ppos & ~PAGE_MASK;
+
+ iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+
+ res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
+ if (res <= 0)
return -ENOMEM;
- res = -ENOMEM;
+ nr_pages = res / PAGE_SIZE;
+
vec = __vec;
- if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
- vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
- if (!vec)
- goto shrink_ret;
+ if (nr_pages > PIPE_DEF_BUFFERS) {
+ vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL);
+ if (unlikely(!vec)) {
+ res = -ENOMEM;
+ goto out;
+ }
}
- offset = *ppos & ~PAGE_MASK;
- nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
- for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
- struct page *page;
+ pipe->bufs[to.idx].offset = offset;
+ pipe->bufs[to.idx].len -= offset;
- page = alloc_page(GFP_USER);
- error = -ENOMEM;
- if (!page)
- goto err;
-
- this_len = min_t(size_t, len, PAGE_SIZE - offset);
- vec[i].iov_base = (void __user *) page_address(page);
+ for (i = 0; i < nr_pages; i++) {
+ size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
+ vec[i].iov_base = page_address(pages[i]) + offset;
vec[i].iov_len = this_len;
- spd.pages[i] = page;
- spd.nr_pages++;
len -= this_len;
offset = 0;
}
- res = kernel_readv(in, vec, spd.nr_pages, *ppos);
- if (res < 0) {
- error = res;
- goto err;
- }
-
- error = 0;
- if (!res)
- goto err;
-
- nr_freed = 0;
- for (i = 0; i < spd.nr_pages; i++) {
- this_len = min_t(size_t, vec[i].iov_len, res);
- spd.partial[i].offset = 0;
- spd.partial[i].len = this_len;
- if (!this_len) {
- __free_page(spd.pages[i]);
- spd.pages[i] = NULL;
- nr_freed++;
- }
- res -= this_len;
- }
- spd.nr_pages -= nr_freed;
-
- res = splice_to_pipe(pipe, &spd);
- if (res > 0)
+ res = kernel_readv(in, vec, nr_pages, *ppos);
+ if (res > 0) {
+ copied = res;
*ppos += res;
+ }
-shrink_ret:
if (vec != __vec)
kfree(vec);
- splice_shrink_spd(&spd);
+out:
+ for (i = 0; i < nr_pages; i++)
+ put_page(pages[i]);
+ kvfree(pages);
+ iov_iter_advance(&to, copied); /* truncates and discards */
return res;
-
-err:
- for (i = 0; i < spd.nr_pages; i++)
- __free_page(spd.pages[i]);
-
- res = error;
- goto shrink_ret;
}
-EXPORT_SYMBOL(default_file_splice_read);
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
@@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
while (pipe->nrbufs) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
- const struct pipe_buf_operations *ops = buf->ops;
sd->len = buf->len;
if (sd->len > sd->total_len)
sd->len = sd->total_len;
- ret = buf->ops->confirm(pipe, buf);
+ ret = pipe_buf_confirm(pipe, buf);
if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
@@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
sd->total_len -= ret;
if (!buf->len) {
- buf->ops = NULL;
- ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
if (pipe->files)
@@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
if (idx == pipe->buffers - 1)
idx = -1;
- ret = buf->ops->confirm(pipe, buf);
+ ret = pipe_buf_confirm(pipe, buf);
if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
@@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
while (ret) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
if (ret >= buf->len) {
- const struct pipe_buf_operations *ops = buf->ops;
ret -= buf->len;
buf->len = 0;
- buf->ops = NULL;
- ops->release(pipe, buf);
+ pipe_buf_release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
if (pipe->files)
@@ -1273,10 +1027,8 @@ out_release:
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
- if (buf->ops) {
- buf->ops->release(pipe, buf);
- buf->ops = NULL;
- }
+ if (buf->ops)
+ pipe_buf_release(pipe, buf);
}
if (!bytes)
@@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
}
EXPORT_SYMBOL(do_splice_direct);
+static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
+{
+ while (pipe->nrbufs == pipe->buffers) {
+ if (flags & SPLICE_F_NONBLOCK)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+ pipe->waiting_writers++;
+ pipe_wait(pipe);
+ pipe->waiting_writers--;
+ }
+ return 0;
+}
+
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
@@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,
offset = in->f_pos;
}
- ret = do_splice_to(in, &offset, opipe, len, flags);
-
+ pipe_lock(opipe);
+ ret = wait_for_space(opipe, flags);
+ if (!ret)
+ ret = do_splice_to(in, &offset, opipe, len, flags);
+ pipe_unlock(opipe);
+ if (ret > 0)
+ wakeup_pipe_readers(opipe);
if (!off_in)
in->f_pos = offset;
else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
@@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in,
return -EINVAL;
}
-/*
- * Map an iov into an array of pages and offset/length tupples. With the
- * partial_page structure, we can map several non-contiguous ranges into
- * our ones pages[] map instead of splitting that operation into pieces.
- * Could easily be exported as a generic helper for other users, in which
- * case one would probably want to add a 'max_nr_pages' parameter as well.
- */
-static int get_iovec_page_array(const struct iovec __user *iov,
- unsigned int nr_vecs, struct page **pages,
- struct partial_page *partial, bool aligned,
- unsigned int pipe_buffers)
+static int iter_to_pipe(struct iov_iter *from,
+ struct pipe_inode_info *pipe,
+ unsigned flags)
{
- int buffers = 0, error = 0;
-
- while (nr_vecs) {
- unsigned long off, npages;
- struct iovec entry;
- void __user *base;
- size_t len;
- int i;
-
- error = -EFAULT;
- if (copy_from_user(&entry, iov, sizeof(entry)))
- break;
-
- base = entry.iov_base;
- len = entry.iov_len;
-
- /*
- * Sanity check this iovec. 0 read succeeds.
- */
- error = 0;
- if (unlikely(!len))
- break;
- error = -EFAULT;
- if (!access_ok(VERIFY_READ, base, len))
- break;
-
- /*
- * Get this base offset and number of pages, then map
- * in the user pages.
- */
- off = (unsigned long) base & ~PAGE_MASK;
-
- /*
- * If asked for alignment, the offset must be zero and the
- * length a multiple of the PAGE_SIZE.
- */
- error = -EINVAL;
- if (aligned && (off || len & ~PAGE_MASK))
- break;
-
- npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (npages > pipe_buffers - buffers)
- npages = pipe_buffers - buffers;
-
- error = get_user_pages_fast((unsigned long)base, npages,
- 0, &pages[buffers]);
-
- if (unlikely(error <= 0))
+ struct pipe_buffer buf = {
+ .ops = &user_page_pipe_buf_ops,
+ .flags = flags
+ };
+ size_t total = 0;
+ int ret = 0;
+ bool failed = false;
+
+ while (iov_iter_count(from) && !failed) {
+ struct page *pages[16];
+ ssize_t copied;
+ size_t start;
+ int n;
+
+ copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start);
+ if (copied <= 0) {
+ ret = copied;
break;
-
- /*
- * Fill this contiguous range into the partial page map.
- */
- for (i = 0; i < error; i++) {
- const int plen = min_t(size_t, len, PAGE_SIZE - off);
-
- partial[buffers].offset = off;
- partial[buffers].len = plen;
-
- off = 0;
- len -= plen;
- buffers++;
}
- /*
- * We didn't complete this iov, stop here since it probably
- * means we have to move some of this into a pipe to
- * be able to continue.
- */
- if (len)
- break;
-
- /*
- * Don't continue if we mapped fewer pages than we asked for,
- * or if we mapped the max number of pages that we have
- * room for.
- */
- if (error < npages || buffers == pipe_buffers)
- break;
-
- nr_vecs--;
- iov++;
+ for (n = 0; copied; n++, start = 0) {
+ int size = min_t(int, copied, PAGE_SIZE - start);
+ if (!failed) {
+ buf.page = pages[n];
+ buf.offset = start;
+ buf.len = size;
+ ret = add_to_pipe(pipe, &buf);
+ if (unlikely(ret < 0)) {
+ failed = true;
+ } else {
+ iov_iter_advance(from, ret);
+ total += ret;
+ }
+ } else {
+ put_page(pages[n]);
+ }
+ copied -= size;
+ }
}
-
- if (buffers)
- return buffers;
-
- return error;
+ return total ? total : ret;
}
static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
@@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
-static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
unsigned long nr_segs, unsigned int flags)
{
struct pipe_inode_info *pipe;
- struct page *pages[PIPE_DEF_BUFFERS];
- struct partial_page partial[PIPE_DEF_BUFFERS];
- struct splice_pipe_desc spd = {
- .pages = pages,
- .partial = partial,
- .nr_pages_max = PIPE_DEF_BUFFERS,
- .flags = flags,
- .ops = &user_page_pipe_buf_ops,
- .spd_release = spd_release_page,
- };
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter from;
long ret;
+ unsigned buf_flag = 0;
+
+ if (flags & SPLICE_F_GIFT)
+ buf_flag = PIPE_BUF_FLAG_GIFT;
pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
-
- spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
- spd.partial, false,
- spd.nr_pages_max);
- if (spd.nr_pages <= 0)
- ret = spd.nr_pages;
- else
- ret = splice_to_pipe(pipe, &spd);
+ ret = import_iovec(WRITE, uiov, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &from);
+ if (ret < 0)
+ return ret;
- splice_shrink_spd(&spd);
+ pipe_lock(pipe);
+ ret = wait_for_space(pipe, flags);
+ if (!ret)
+ ret = iter_to_pipe(&from, pipe, buf_flag);
+ pipe_unlock(pipe);
+ if (ret > 0)
+ wakeup_pipe_readers(pipe);
+ kfree(iov);
return ret;
}
@@ -1876,7 +1589,7 @@ retry:
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- ibuf->ops->get(ipipe, ibuf);
+ pipe_buf_get(ipipe, ibuf);
*obuf = *ibuf;
/*
@@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- ibuf->ops->get(ipipe, ibuf);
+ pipe_buf_get(ipipe, ibuf);
obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c68517b0f248..f46b2929c64d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -393,45 +393,6 @@ xfs_file_read_iter(
return ret;
}
-STATIC ssize_t
-xfs_file_splice_read(
- struct file *infilp,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t count,
- unsigned int flags)
-{
- struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
- ssize_t ret;
-
- XFS_STATS_INC(ip->i_mount, xs_read_calls);
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- trace_xfs_file_splice_read(ip, count, *ppos);
-
- /*
- * DAX inodes cannot ues the page cache for splice, so we have to push
- * them through the VFS IO path. This means it goes through
- * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
- * cannot lock the splice operation at this level for DAX inodes.
- */
- if (IS_DAX(VFS_I(ip))) {
- ret = default_file_splice_read(infilp, ppos, pipe, count,
- flags);
- goto out;
- }
-
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-out:
- if (ret > 0)
- XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
- return ret;
-}
-
/*
* Zero any on disk space between the current EOF and the new, larger EOF.
*
@@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
.read_iter = xfs_file_read_iter,
.write_iter = xfs_file_write_iter,
- .splice_read = xfs_file_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c6b2b1dcde75..16093c7dacde 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
DECLARE_EVENT_CLASS(xfs_page_class,
TP_PROTO(struct inode *inode, struct page *page, unsigned long off,