summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/blkdev.h16
-rw-r--r--include/linux/clk.h80
-rw-r--r--include/linux/compiler_attributes.h26
-rw-r--r--include/linux/compiler_types.h28
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/cpu.h4
-rw-r--r--include/linux/cpumask.h8
-rw-r--r--include/linux/dm-verity-loadpin.h2
-rw-r--r--include/linux/dnotify.h4
-rw-r--r--include/linux/filelock.h12
-rw-r--r--include/linux/fs.h239
-rw-r--r--include/linux/fs_context.h6
-rw-r--r--include/linux/fs_stack.h2
-rw-r--r--include/linux/ftrace.h4
-rw-r--r--include/linux/huge_mm.h3
-rw-r--r--include/linux/hyperv.h3
-rw-r--r--include/linux/intel_rapl.h14
-rw-r--r--include/linux/iomap.h3
-rw-r--r--include/linux/list.h89
-rw-r--r--include/linux/lsm_hook_defs.h1
-rw-r--r--include/linux/mm.h50
-rw-r--r--include/linux/mm_types.h37
-rw-r--r--include/linux/mmap_lock.h10
-rw-r--r--include/linux/nls.h2
-rw-r--r--include/linux/notifier.h11
-rw-r--r--include/linux/nsproxy.h7
-rw-r--r--include/linux/pagemap.h82
-rw-r--r--include/linux/pagewalk.h11
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/pm_wakeirq.h10
-rw-r--r--include/linux/prefetch.h7
-rw-r--r--include/linux/raid_class.h4
-rw-r--r--include/linux/rculist_nulls.h4
-rw-r--r--include/linux/rcupdate_trace.h1
-rw-r--r--include/linux/rcupdate_wait.h5
-rw-r--r--include/linux/security.h6
-rw-r--r--include/linux/seq_file.h7
-rw-r--r--include/linux/serial_core.h3
-rw-r--r--include/linux/shmem_fs.h31
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/linux/spi/corgi_lcd.h2
-rw-r--r--include/linux/spi/spi-mem.h4
-rw-r--r--include/linux/srcutiny.h4
-rw-r--r--include/linux/swait.h2
-rw-r--r--include/linux/syscalls.h20
-rw-r--r--include/linux/thermal.h6
-rw-r--r--include/linux/torture.h7
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/trace_events.h11
-rw-r--r--include/linux/uio.h9
-rw-r--r--include/linux/virtio_net.h4
-rw-r--r--include/linux/wait.h3
-rw-r--r--include/linux/writeback.h5
-rw-r--r--include/linux/xattr.h10
56 files changed, 710 insertions, 219 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c4f5b5228105..11984ed29cb8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
{
bio->bi_opf |= REQ_POLLED;
- if (!is_sync_kiocb(kiocb))
+ if (kiocb->ki_flags & IOCB_NOWAIT)
bio->bi_opf |= REQ_NOWAIT;
}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0bad62cca3d0..d5c5e59ddbd2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -52,7 +52,6 @@ struct block_device {
atomic_t bd_openers;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
void * bd_claiming;
void * bd_holder;
const struct blk_holder_ops *bd_holder_ops;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed44a997f629..83ce87354e9a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -750,7 +750,8 @@ static inline int bdev_read_only(struct block_device *bdev)
}
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+void disk_force_media_change(struct gendisk *disk);
+void bdev_mark_dead(struct block_device *bdev, bool surprise);
void add_disk_randomness(struct gendisk *disk) __latent_entropy;
void rand_initialize_disk(struct gendisk *disk);
@@ -809,7 +810,6 @@ int __register_blkdev(unsigned int major, const char *name,
void unregister_blkdev(unsigned int major, const char *name);
bool disk_check_media_change(struct gendisk *disk);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
void set_capacity(struct gendisk *disk, sector_t size);
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
@@ -969,7 +969,6 @@ struct blk_plug {
bool multiple_queues;
bool has_elevator;
- bool nowait;
struct list_head cb_list; /* md requires an unplug callback */
};
@@ -1461,9 +1460,16 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
#endif
struct blk_holder_ops {
- void (*mark_dead)(struct block_device *bdev);
+ void (*mark_dead)(struct block_device *bdev, bool surprise);
+
+ /*
+ * Sync the file system mounted on the block device.
+ */
+ void (*sync)(struct block_device *bdev);
};
+extern const struct blk_holder_ops fs_holder_ops;
+
/*
* Return the correct open flags for blkdev_get_by_* for super block flags
* as stored in sb->s_flags.
@@ -1522,8 +1528,6 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
}
#endif /* CONFIG_BLOCK */
-int fsync_bdev(struct block_device *bdev);
-
int freeze_bdev(struct block_device *bdev);
int thaw_bdev(struct block_device *bdev);
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 1ef013324237..06f1b292f8a0 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -183,6 +183,39 @@ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
*/
bool clk_is_match(const struct clk *p, const struct clk *q);
+/**
+ * clk_rate_exclusive_get - get exclusivity over the rate control of a
+ * producer
+ * @clk: clock source
+ *
+ * This function allows drivers to get exclusive control over the rate of a
+ * provider. It prevents any other consumer to execute, even indirectly,
+ * opereation which could alter the rate of the provider or cause glitches
+ *
+ * If exlusivity is claimed more than once on clock, even by the same driver,
+ * the rate effectively gets locked as exclusivity can't be preempted.
+ *
+ * Must not be called from within atomic context.
+ *
+ * Returns success (0) or negative errno.
+ */
+int clk_rate_exclusive_get(struct clk *clk);
+
+/**
+ * clk_rate_exclusive_put - release exclusivity over the rate control of a
+ * producer
+ * @clk: clock source
+ *
+ * This function allows drivers to release the exclusivity it previously got
+ * from clk_rate_exclusive_get()
+ *
+ * The caller must balance the number of clk_rate_exclusive_get() and
+ * clk_rate_exclusive_put() calls.
+ *
+ * Must not be called from within atomic context.
+ */
+void clk_rate_exclusive_put(struct clk *clk);
+
#else
static inline int clk_notifier_register(struct clk *clk,
@@ -236,6 +269,13 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q)
return p == q;
}
+static inline int clk_rate_exclusive_get(struct clk *clk)
+{
+ return 0;
+}
+
+static inline void clk_rate_exclusive_put(struct clk *clk) {}
+
#endif
#ifdef CONFIG_HAVE_CLK_PREPARE
@@ -583,38 +623,6 @@ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
*/
struct clk *devm_get_clk_from_child(struct device *dev,
struct device_node *np, const char *con_id);
-/**
- * clk_rate_exclusive_get - get exclusivity over the rate control of a
- * producer
- * @clk: clock source
- *
- * This function allows drivers to get exclusive control over the rate of a
- * provider. It prevents any other consumer to execute, even indirectly,
- * opereation which could alter the rate of the provider or cause glitches
- *
- * If exlusivity is claimed more than once on clock, even by the same driver,
- * the rate effectively gets locked as exclusivity can't be preempted.
- *
- * Must not be called from within atomic context.
- *
- * Returns success (0) or negative errno.
- */
-int clk_rate_exclusive_get(struct clk *clk);
-
-/**
- * clk_rate_exclusive_put - release exclusivity over the rate control of a
- * producer
- * @clk: clock source
- *
- * This function allows drivers to release the exclusivity it previously got
- * from clk_rate_exclusive_get()
- *
- * The caller must balance the number of clk_rate_exclusive_get() and
- * clk_rate_exclusive_put() calls.
- *
- * Must not be called from within atomic context.
- */
-void clk_rate_exclusive_put(struct clk *clk);
/**
* clk_enable - inform the system when the clock source should be running.
@@ -974,14 +982,6 @@ static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
-
-static inline int clk_rate_exclusive_get(struct clk *clk)
-{
- return 0;
-}
-
-static inline void clk_rate_exclusive_put(struct clk *clk) {}
-
static inline int clk_enable(struct clk *clk)
{
return 0;
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 00efa35c350f..28566624f008 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -95,6 +95,19 @@
#endif
/*
+ * Optional: only supported since gcc >= 14
+ * Optional: only supported since clang >= 18
+ *
+ * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
+ * clang: https://reviews.llvm.org/D148381
+ */
+#if __has_attribute(__counted_by__)
+# define __counted_by(member) __attribute__((__counted_by__(member)))
+#else
+# define __counted_by(member)
+#endif
+
+/*
* Optional: not supported by gcc
* Optional: only supported since clang >= 14.0
*
@@ -130,19 +143,6 @@
#endif
/*
- * Optional: only supported since gcc >= 14
- * Optional: only supported since clang >= 17
- *
- * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
- * clang: https://reviews.llvm.org/D148381
- */
-#if __has_attribute(__element_count__)
-# define __counted_by(member) __attribute__((__element_count__(#member)))
-#else
-# define __counted_by(member)
-#endif
-
-/*
* Optional: only supported since clang >= 14.0
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 547ea1ff806e..c523c6683789 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -106,6 +106,34 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
#define __cold
#endif
+/*
+ * On x86-64 and arm64 targets, __preserve_most changes the calling convention
+ * of a function to make the code in the caller as unintrusive as possible. This
+ * convention behaves identically to the C calling convention on how arguments
+ * and return values are passed, but uses a different set of caller- and callee-
+ * saved registers.
+ *
+ * The purpose is to alleviates the burden of saving and recovering a large
+ * register set before and after the call in the caller. This is beneficial for
+ * rarely taken slow paths, such as error-reporting functions that may be called
+ * from hot paths.
+ *
+ * Note: This may conflict with instrumentation inserted on function entry which
+ * does not use __preserve_most or equivalent convention (if in assembly). Since
+ * function tracing assumes the normal C calling convention, where the attribute
+ * is supported, __preserve_most implies notrace. It is recommended to restrict
+ * use of the attribute to functions that should or already disable tracing.
+ *
+ * Optional: not supported by gcc.
+ *
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most
+ */
+#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64))
+# define __preserve_most notrace __attribute__((__preserve_most__))
+#else
+# define __preserve_most
+#endif
+
/* Builtins */
/*
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 62b32b19e0a8..fb2915676574 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
extern bool completion_done(struct completion *x);
extern void complete(struct completion *);
+extern void complete_on_current_cpu(struct completion *x);
extern void complete_all(struct completion *);
#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ed56b2534500..0abd60a7987b 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -71,6 +71,10 @@ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
char *buf);
extern ssize_t cpu_show_retbleed(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_gds(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 0d2e2a38b92d..f10fb87d49db 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -175,8 +175,8 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
/**
* cpumask_first_and - return the first cpu from *srcp1 & *srcp2
- * @src1p: the first input
- * @src2p: the second input
+ * @srcp1: the first input
+ * @srcp2: the second input
*
* Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
*/
@@ -1197,6 +1197,10 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
/**
* cpumap_print_list_to_buf - copies the cpumask into the buffer as
* comma-separated list of cpus
+ * @buf: the buffer to copy into
+ * @mask: the cpumask to copy
+ * @off: in the string from which we are copying, we copy to @buf
+ * @count: the maximum number of bytes to print
*
* Everything is same with the above cpumap_print_bitmask_to_buf()
* except the print format.
diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h
index 552b817ab102..3ac6dbaeaa37 100644
--- a/include/linux/dm-verity-loadpin.h
+++ b/include/linux/dm-verity-loadpin.h
@@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests;
struct dm_verity_loadpin_trusted_root_digest {
struct list_head node;
unsigned int len;
- u8 data[];
+ u8 data[] __counted_by(len);
};
#if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY)
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b1d26f9f1c9f..9f183a679277 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -30,7 +30,7 @@ struct dnotify_struct {
FS_MOVED_FROM | FS_MOVED_TO)
extern void dnotify_flush(struct file *, fl_owner_t);
-extern int fcntl_dirnotify(int, struct file *, unsigned long);
+extern int fcntl_dirnotify(int, struct file *, unsigned int);
#else
@@ -38,7 +38,7 @@ static inline void dnotify_flush(struct file *filp, fl_owner_t id)
{
}
-static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
{
return -EINVAL;
}
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index efcdd1631d9b..95e868e09e29 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -144,7 +144,7 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int,
struct flock64 *);
#endif
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
int fcntl_getlease(struct file *filp);
/* fs/locks.c */
@@ -167,8 +167,8 @@ bool vfs_inode_has_locks(struct inode *inode);
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
void lease_get_mtime(struct inode *, struct timespec64 *time);
-int generic_setlease(struct file *, long, struct file_lock **, void **priv);
-int vfs_setlease(struct file *, long, struct file_lock **, void **);
+int generic_setlease(struct file *, int, struct file_lock **, void **priv);
+int vfs_setlease(struct file *, int, struct file_lock **, void **);
int lease_modify(struct file_lock *, int, struct list_head *);
struct notifier_block;
@@ -213,7 +213,7 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file,
return -EACCES;
}
#endif
-static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
{
return -EINVAL;
}
@@ -306,13 +306,13 @@ static inline void lease_get_mtime(struct inode *inode,
return;
}
-static inline int generic_setlease(struct file *filp, long arg,
+static inline int generic_setlease(struct file *filp, int arg,
struct file_lock **flp, void **priv)
{
return -EINVAL;
}
-static inline int vfs_setlease(struct file *filp, long arg,
+static inline int vfs_setlease(struct file *filp, int arg,
struct file_lock **lease, void **priv)
{
return -EINVAL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..dda08d973639 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -338,6 +338,20 @@ enum rw_hint {
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
+/*
+ * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
+ * iocb completion can be passed back to the owner for execution from a safe
+ * context rather than needing to be punted through a workqueue. If this
+ * flag is set, the bio completion handling may set iocb->dio_complete to a
+ * handler function and iocb->private to context information for that handler.
+ * The issuer should call the handler with that context information from task
+ * context to complete the processing of the iocb. Note that while this
+ * provides a task context for the dio_complete() callback, it should only be
+ * used on the completion side for non-IO generating completions. It's fine to
+ * call blocking functions from this callback, but they should not wait for
+ * unrelated IO (like cache flushing, new IO generation, etc).
+ */
+#define IOCB_DIO_CALLER_COMP (1 << 22)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -351,7 +365,8 @@ enum rw_hint {
{ IOCB_WRITE, "WRITE" }, \
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
- { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }
+ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
+ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
struct kiocb {
struct file *ki_filp;
@@ -360,7 +375,23 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
- struct wait_page_queue *ki_waitq; /* for async buffered IO */
+ union {
+ /*
+ * Only used for async buffered reads, where it denotes the
+ * page waitqueue associated with completing the read. Valid
+ * IFF IOCB_WAITQ is set.
+ */
+ struct wait_page_queue *ki_waitq;
+ /*
+ * Can be used for O_DIRECT IO, where the completion handling
+ * is punted back to the issuer of the IO. May only be set
+ * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
+ * must then check for presence of this handler when ki_complete
+ * is invoked. The data passed in to this handler must be
+ * assigned to ->private when dio_complete is assigned.
+ */
+ ssize_t (*dio_complete)(void *data);
+ };
};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -642,7 +673,7 @@ struct inode {
loff_t i_size;
struct timespec64 i_atime;
struct timespec64 i_mtime;
- struct timespec64 i_ctime;
+ struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
u8 i_blkbits;
@@ -1069,7 +1100,7 @@ extern void fasync_free(struct fasync_struct *);
extern void kill_fasync(struct fasync_struct **, int, int);
extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
+extern int f_setown(struct file *filp, int who, int force);
extern void f_delown(struct file *filp);
extern pid_t f_getown(struct file *filp);
extern int send_sigurg(struct fown_struct *fown);
@@ -1095,6 +1126,8 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
+#define SB_DEAD BIT(21)
+#define SB_DYING BIT(24)
#define SB_SUBMOUNT BIT(26)
#define SB_FORCE BIT(27)
#define SB_NOSEC BIT(28)
@@ -1147,7 +1180,8 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers {
- int frozen; /* Is sb frozen? */
+ unsigned short frozen; /* Is sb frozen? */
+ unsigned short freeze_holders; /* Who froze fs? */
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
@@ -1474,7 +1508,79 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
kgid_has_mapping(fs_userns, kgid);
}
-extern struct timespec64 current_time(struct inode *inode);
+struct timespec64 current_mgtime(struct inode *inode);
+struct timespec64 current_time(struct inode *inode);
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+
+/*
+ * Multigrain timestamps
+ *
+ * Conditionally use fine-grained ctime and mtime timestamps when there
+ * are users actively observing them via getattr. The primary use-case
+ * for this is NFS clients that use the ctime to distinguish between
+ * different states of the file, and that are often fooled by multiple
+ * operations that occur in the same coarse-grained timer tick.
+ *
+ * The kernel always keeps normalized struct timespec64 values in the ctime,
+ * which means that only the first 30 bits of the value are used. Use the
+ * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value
+ * has been queried since it was last updated.
+ */
+#define I_CTIME_QUERIED (1L<<30)
+
+/**
+ * inode_get_ctime - fetch the current ctime from the inode
+ * @inode: inode from which to fetch ctime
+ *
+ * Grab the current ctime tv_nsec field from the inode, mask off the
+ * I_CTIME_QUERIED flag and return it. This is mostly intended for use by
+ * internal consumers of the ctime that aren't concerned with ensuring a
+ * fine-grained update on the next change (e.g. when preparing to store
+ * the value in the backing store for later retrieval).
+ *
+ * This is safe to call regardless of whether the underlying filesystem
+ * is using multigrain timestamps.
+ */
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+ struct timespec64 ctime;
+
+ ctime.tv_sec = inode->__i_ctime.tv_sec;
+ ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED;
+
+ return ctime;
+}
+
+/**
+ * inode_set_ctime_to_ts - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @ts: value to set in the ctime field
+ *
+ * Set the ctime in @inode to @ts
+ */
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+ inode->__i_ctime = ts;
+ return ts;
+}
+
+/**
+ * inode_set_ctime - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @sec: tv_sec value to set
+ * @nsec: tv_nsec value to set
+ *
+ * Set the ctime in @inode to { @sec, @nsec }
+ */
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+ time64_t sec, long nsec)
+{
+ struct timespec64 ts = { .tv_sec = sec,
+ .tv_nsec = nsec };
+
+ return inode_set_ctime_to_ts(inode, ts);
+}
/*
* Snapshotting support.
@@ -1770,6 +1876,7 @@ struct dir_context {
struct iov_iter;
struct io_uring_cmd;
+struct offset_ctx;
struct file_operations {
struct module *owner;
@@ -1780,7 +1887,6 @@ struct file_operations {
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
unsigned int flags);
- int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -1799,7 +1905,7 @@ struct file_operations {
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
void (*splice_eof)(struct file *file);
- int (*setlease)(struct file *, long, struct file_lock **, void **);
+ int (*setlease)(struct file *, int, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -1817,6 +1923,13 @@ struct file_operations {
unsigned int poll_flags);
} __randomize_layout;
+/* Wrap a directory iterator that needs exclusive inode access */
+int wrap_directory_iterator(struct file *, struct dir_context *,
+ int (*) (struct file *, struct dir_context *));
+#define WRAP_DIR_ITER(x) \
+ static int shared_##x(struct file *file , struct dir_context *ctx) \
+ { return wrap_directory_iterator(file, ctx, x); }
+
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
@@ -1844,7 +1957,7 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
- int (*update_time)(struct inode *, struct timespec64 *, int);
+ int (*update_time)(struct inode *, int);
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
umode_t create_mode);
@@ -1857,6 +1970,7 @@ struct inode_operations {
int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+ struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned;
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -1902,6 +2016,10 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct file *dst_file, loff_t dst_pos,
loff_t len, unsigned int remap_flags);
+enum freeze_holder {
+ FREEZE_HOLDER_KERNEL = (1U << 0),
+ FREEZE_HOLDER_USERSPACE = (1U << 1),
+};
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
@@ -1914,9 +2032,9 @@ struct super_operations {
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *);
+ int (*freeze_super) (struct super_block *, enum freeze_holder who);
int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *);
+ int (*thaw_super) (struct super_block *, enum freeze_holder who);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
@@ -2194,7 +2312,7 @@ enum file_time_flags {
extern bool atime_needs_update(const struct path *, struct inode *);
extern void touch_atime(const struct path *);
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int inode_update_time(struct inode *inode, int flags);
static inline void file_accessed(struct file *file)
{
@@ -2216,6 +2334,7 @@ struct file_system_type {
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
+#define FS_MGTIME 64 /* FS uses multigrain timestamps */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2239,6 +2358,17 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
+/**
+ * is_mgtime: is this inode using multigrain timestamps
+ * @inode: inode to test for multigrain timestamps
+ *
+ * Return true if the inode uses multigrain timestamps, false otherwise.
+ */
+static inline bool is_mgtime(const struct inode *inode)
+{
+ return inode->i_sb->s_type->fs_flags & FS_MGTIME;
+}
+
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2290,8 +2420,8 @@ extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
-extern int freeze_super(struct super_block *super);
-extern int thaw_super(struct super_block *super);
+int freeze_super(struct super_block *super, enum freeze_holder who);
+int thaw_super(struct super_block *super, enum freeze_holder who);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
@@ -2300,7 +2430,8 @@ extern int current_umask(void);
extern void ihold(struct inode * inode);
extern void iput(struct inode *);
-extern int generic_update_time(struct inode *, struct timespec64 *, int);
+int inode_update_timestamps(struct inode *inode, int flags);
+int generic_update_time(struct inode *, int);
/* /sys/fs */
extern struct kobject *fs_kobj;
@@ -2539,6 +2670,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
return (inode->i_mode ^ mode) & S_IFMT;
}
+/**
+ * file_start_write - get write access to a superblock for regular file io
+ * @file: the file we want to write to
+ *
+ * This is a variant of sb_start_write() which is a noop on non-regualr file.
+ * Should be matched with a call to file_end_write().
+ */
static inline void file_start_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
@@ -2553,11 +2691,53 @@ static inline bool file_start_write_trylock(struct file *file)
return sb_start_write_trylock(file_inode(file)->i_sb);
}
+/**
+ * file_end_write - drop write access to a superblock of a regular file
+ * @file: the file we wrote to
+ *
+ * Should be matched with a call to file_start_write().
+ */
static inline void file_end_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return;
- __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(file_inode(file)->i_sb);
+}
+
+/**
+ * kiocb_start_write - get write access to a superblock for async file io
+ * @iocb: the io context we want to submit the write with
+ *
+ * This is a variant of sb_start_write() for async io submission.
+ * Should be matched with a call to kiocb_end_write().
+ */
+static inline void kiocb_start_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ sb_start_write(inode->i_sb);
+ /*
+ * Fool lockdep by telling it the lock got released so that it
+ * doesn't complain about the held lock when we return to userspace.
+ */
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * kiocb_end_write - drop write access to a superblock after async file io
+ * @iocb: the io context we sumbitted the write with
+ *
+ * Should be matched with a call to kiocb_start_write().
+ */
+static inline void kiocb_end_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ /*
+ * Tell lockdep we inherited freeze protection from submission thread.
+ */
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(inode->i_sb);
}
/*
@@ -2607,8 +2787,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
static inline void i_readcount_dec(struct inode *inode)
{
- BUG_ON(!atomic_read(&inode->i_readcount));
- atomic_dec(&inode->i_readcount);
+ BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
}
static inline void i_readcount_inc(struct inode *inode)
{
@@ -2874,7 +3053,8 @@ extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
-void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *);
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
+void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -2913,7 +3093,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int);
extern struct file_system_type *get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_super(struct block_device *);
extern struct super_block *get_active_super(struct block_device *bdev);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
@@ -2934,6 +3113,8 @@ extern int simple_open(struct inode *inode, struct file *file);
extern int simple_link(struct dentry *, struct inode *, struct dentry *);
extern int simple_unlink(struct inode *, struct dentry *);
extern int simple_rmdir(struct inode *, struct dentry *);
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename(struct mnt_idmap *, struct inode *,
@@ -2950,7 +3131,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
+extern int simple_nosetlease(struct file *, int, struct file_lock **, void **);
extern const struct dentry_operations simple_dentry_operations;
extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
@@ -2971,6 +3152,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
const void __user *from, size_t count);
+struct offset_ctx {
+ struct xarray xa;
+ u32 next_offset;
+};
+
+void simple_offset_init(struct offset_ctx *octx);
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_rename_exchange(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry);
+void simple_offset_destroy(struct offset_ctx *octx);
+
+extern const struct file_operations simple_offset_dir_operations;
+
extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index ff6341e09925..96332db693d5 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -109,6 +109,7 @@ struct fs_context {
bool need_free:1; /* Need to call ops->free() */
bool global:1; /* Goes into &init_user_ns */
bool oldapi:1; /* Coming from mount(2) */
+ bool exclusive:1; /* create new superblock, reject existing one */
};
struct fs_context_operations {
@@ -150,14 +151,13 @@ extern int get_tree_nodev(struct fs_context *fc,
extern int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
-extern int get_tree_single_reconf(struct fs_context *fc,
- int (*fill_super)(struct super_block *sb,
- struct fs_context *fc));
extern int get_tree_keyed(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc),
void *key);
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+ struct fs_context *fc);
extern int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index 54210a42c30d..010d39d0dc1c 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -24,7 +24,7 @@ static inline void fsstack_copy_attr_times(struct inode *dest,
{
dest->i_atime = src->i_atime;
dest->i_mtime = src->i_mtime;
- dest->i_ctime = src->i_ctime;
+ inode_set_ctime_to_ts(dest, inode_get_ctime(src));
}
#endif /* _LINUX_FS_STACK_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ce156c7704ee..aad9cf8876b5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -684,7 +684,6 @@ void __init
ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
/* defined in arch */
-extern int ftrace_ip_converted(unsigned long ip);
extern int ftrace_dyn_arch_init(void);
extern void ftrace_replace_code(int enable);
extern int ftrace_update_ftrace_func(ftrace_func_t func);
@@ -859,9 +858,6 @@ static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_a
}
#endif
-/* May be defined in arch */
-extern int ftrace_arch_read_dyn_info(char *buf, int size);
-
extern int skip_trace(unsigned long ip);
extern void ftrace_module_init(struct module *mod);
extern void ftrace_module_enable(struct module *mod);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 20284387b841..e718dbe928ba 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -25,9 +25,6 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
#endif
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmd,
- unsigned int flags);
bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr, unsigned long next);
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index bfbc37ce223b..3ac3974b3c78 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1239,9 +1239,6 @@ extern int vmbus_recvpacket_raw(struct vmbus_channel *channel,
u32 *buffer_actual_len,
u64 *requestid);
-
-extern void vmbus_ontimer(unsigned long data);
-
/* Base driver object */
struct hv_driver {
const char *name;
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index e6936cb25047..33f21bd85dbf 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -100,10 +100,16 @@ struct rapl_package;
#define RAPL_DOMAIN_NAME_LENGTH 16
+union rapl_reg {
+ void __iomem *mmio;
+ u32 msr;
+ u64 val;
+};
+
struct rapl_domain {
char name[RAPL_DOMAIN_NAME_LENGTH];
enum rapl_domain_type id;
- u64 regs[RAPL_DOMAIN_REG_MAX];
+ union rapl_reg regs[RAPL_DOMAIN_REG_MAX];
struct powercap_zone power_zone;
struct rapl_domain_data rdd;
struct rapl_power_limit rpl[NR_POWER_LIMITS];
@@ -116,7 +122,7 @@ struct rapl_domain {
};
struct reg_action {
- u64 reg;
+ union rapl_reg reg;
u64 mask;
u64 value;
int err;
@@ -143,8 +149,8 @@ struct rapl_if_priv {
enum rapl_if_type type;
struct powercap_control_type *control_type;
enum cpuhp_state pcap_rapl_online;
- u64 reg_unit;
- u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
+ union rapl_reg reg_unit;
+ union rapl_reg regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
int limits[RAPL_DOMAIN_MAX];
int (*read_raw)(int id, struct reg_action *ra);
int (*write_raw)(int id, struct reg_action *ra);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index e2b836c2e119..fdc6e64f49d6 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -261,9 +261,10 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos);
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
diff --git a/include/linux/list.h b/include/linux/list.h
index f10344dbad4d..164b4d0e9d2a 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -38,11 +38,92 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
WRITE_ONCE(list->prev, list);
}
+#ifdef CONFIG_LIST_HARDENED
+
#ifdef CONFIG_DEBUG_LIST
-extern bool __list_add_valid(struct list_head *new,
- struct list_head *prev,
- struct list_head *next);
-extern bool __list_del_entry_valid(struct list_head *entry);
+# define __list_valid_slowpath
+#else
+# define __list_valid_slowpath __cold __preserve_most
+#endif
+
+/*
+ * Performs the full set of list corruption checks before __list_add().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next);
+
+/*
+ * Performs list corruption checks before __list_add(). Returns false if a
+ * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_add_valid_or_report().
+ */
+static __always_inline bool __list_add_valid(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ bool ret = true;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+ /*
+ * With the hardening version, elide checking if next and prev
+ * are NULL, since the immediate dereference of them below would
+ * result in a fault if NULL.
+ *
+ * With the reduced set of checks, we can afford to inline the
+ * checks, which also gives the compiler a chance to elide some
+ * of them completely if they can be proven at compile-time. If
+ * one of the pre-conditions does not hold, the slow-path will
+ * show a report which pre-condition failed.
+ */
+ if (likely(next->prev == prev && prev->next == next && new != prev && new != next))
+ return true;
+ ret = false;
+ }
+
+ ret &= __list_add_valid_or_report(new, prev, next);
+ return ret;
+}
+
+/*
+ * Performs the full set of list corruption checks before __list_del_entry().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
+
+/*
+ * Performs list corruption checks before __list_del_entry(). Returns false if a
+ * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_del_entry_valid_or_report().
+ */
+static __always_inline bool __list_del_entry_valid(struct list_head *entry)
+{
+ bool ret = true;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+ struct list_head *prev = entry->prev;
+ struct list_head *next = entry->next;
+
+ /*
+ * With the hardening version, elide checking if next and prev
+ * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate
+ * dereference of them below would result in a fault.
+ */
+ if (likely(prev->next == entry && next->prev == entry))
+ return true;
+ ret = false;
+ }
+
+ ret &= __list_del_entry_valid_or_report(entry);
+ return ret;
+}
#else
static inline bool __list_add_valid(struct list_head *new,
struct list_head *prev,
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 7308a1a7599b..af796986baee 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -54,6 +54,7 @@ LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *f
LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm)
LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm)
+LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference)
LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
struct fs_context *src_sc)
LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2dd73e4f3d8e..34f9dba17c1a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -641,8 +641,14 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
*/
static inline bool vma_start_read(struct vm_area_struct *vma)
{
- /* Check before locking. A race might cause false locked result. */
- if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))
+ /*
+ * Check before locking. A race might cause false locked result.
+ * We can use READ_ONCE() for the mm_lock_seq here, and don't need
+ * ACQUIRE semantics, because this is just a lockless check whose result
+ * we don't rely on for anything - the mm_lock_seq read against which we
+ * need ordering is below.
+ */
+ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
return false;
if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
@@ -653,8 +659,13 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
* False unlocked result is impossible because we modify and check
* vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq
* modification invalidates all existing locks.
+ *
+ * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
+ * racing with vma_end_write_all(), we only start reading from the VMA
+ * after it has been unlocked.
+ * This pairs with RELEASE semantics in vma_end_write_all().
*/
- if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) {
+ if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
up_read(&vma->vm_lock->lock);
return false;
}
@@ -676,7 +687,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
* mm->mm_lock_seq can't be concurrently modified.
*/
- *mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
+ *mm_lock_seq = vma->vm_mm->mm_lock_seq;
return (vma->vm_lock_seq == *mm_lock_seq);
}
@@ -688,7 +699,13 @@ static inline void vma_start_write(struct vm_area_struct *vma)
return;
down_write(&vma->vm_lock->lock);
- vma->vm_lock_seq = mm_lock_seq;
+ /*
+ * We should use WRITE_ONCE() here because we can have concurrent reads
+ * from the early lockless pessimistic check in vma_start_read().
+ * We don't really care about the correctness of that early check, but
+ * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy.
+ */
+ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
up_write(&vma->vm_lock->lock);
}
@@ -702,7 +719,7 @@ static inline bool vma_try_start_write(struct vm_area_struct *vma)
if (!down_write_trylock(&vma->vm_lock->lock))
return false;
- vma->vm_lock_seq = mm_lock_seq;
+ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
up_write(&vma->vm_lock->lock);
return true;
}
@@ -3404,15 +3421,24 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
* a (NUMA hinting) fault is required.
*/
-static inline bool gup_can_follow_protnone(unsigned int flags)
+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+ unsigned int flags)
{
/*
- * FOLL_FORCE has to be able to make progress even if the VMA is
- * inaccessible. Further, FOLL_FORCE access usually does not represent
- * application behaviour and we should avoid triggering NUMA hinting
- * faults.
+ * If callers don't want to honor NUMA hinting faults, no need to
+ * determine if we would actually have to trigger a NUMA hinting fault.
+ */
+ if (!(flags & FOLL_HONOR_NUMA_FAULT))
+ return true;
+
+ /*
+ * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
+ *
+ * Requiring a fault here even for inaccessible VMAs would mean that
+ * FOLL_FORCE cannot make any progress, because handle_mm_fault()
+ * refuses to process NUMA hinting faults in inaccessible VMAs.
*/
- return flags & FOLL_FORCE;
+ return !vma_is_accessible(vma);
}
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index de10fc797c8e..7d30dc4ff0ff 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -514,6 +514,20 @@ struct vm_area_struct {
};
#ifdef CONFIG_PER_VMA_LOCK
+ /*
+ * Can only be written (using WRITE_ONCE()) while holding both:
+ * - mmap_lock (in write mode)
+ * - vm_lock->lock (in write mode)
+ * Can be read reliably while holding one of:
+ * - mmap_lock (in read or write mode)
+ * - vm_lock->lock (in read or write mode)
+ * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
+ * while holding nothing (except RCU to keep the VMA struct allocated).
+ *
+ * This sequence counter is explicitly allowed to overflow; sequence
+ * counter reuse can only lead to occasional unnecessary use of the
+ * slowpath.
+ */
int vm_lock_seq;
struct vma_lock *vm_lock;
@@ -679,6 +693,20 @@ struct mm_struct {
* by mmlist_lock
*/
#ifdef CONFIG_PER_VMA_LOCK
+ /*
+ * This field has lock-like semantics, meaning it is sometimes
+ * accessed with ACQUIRE/RELEASE semantics.
+ * Roughly speaking, incrementing the sequence number is
+ * equivalent to releasing locks on VMAs; reading the sequence
+ * number can be part of taking a read lock on a VMA.
+ *
+ * Can be modified under write mmap_lock using RELEASE
+ * semantics.
+ * Can be read with no other protection when holding write
+ * mmap_lock.
+ * Can be read with ACQUIRE semantics if not holding write
+ * mmap_lock.
+ */
int mm_lock_seq;
#endif
@@ -1258,6 +1286,15 @@ enum {
FOLL_PCI_P2PDMA = 1 << 10,
/* allow interrupts from generic signals */
FOLL_INTERRUPTIBLE = 1 << 11,
+ /*
+ * Always honor (trigger) NUMA hinting faults.
+ *
+ * FOLL_WRITE implicitly honors NUMA hinting faults because a
+ * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
+ * apply). get_user_pages_fast_only() always implicitly honors NUMA
+ * hinting faults.
+ */
+ FOLL_HONOR_NUMA_FAULT = 1 << 12,
/* See also internal only FOLL flags in mm/internal.h */
};
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index aab8f1b28d26..e05e167dbd16 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -76,8 +76,14 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
static inline void vma_end_write_all(struct mm_struct *mm)
{
mmap_assert_write_locked(mm);
- /* No races during update due to exclusive mmap_lock being held */
- WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1);
+ /*
+ * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
+ * mmap_lock being held.
+ * We need RELEASE semantics here to ensure that preceding stores into
+ * the VMA take effect before we unlock it with this store.
+ * Pairs with ACQUIRE semantics in vma_start_read().
+ */
+ smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
}
#else
static inline void vma_end_write_all(struct mm_struct *mm) {}
diff --git a/include/linux/nls.h b/include/linux/nls.h
index 499e486b3722..e0bf8367b274 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -47,7 +47,7 @@ enum utf16_endian {
/* nls_base.c */
extern int __register_nls(struct nls_table *, struct module *);
extern int unregister_nls(struct nls_table *);
-extern struct nls_table *load_nls(char *);
+extern struct nls_table *load_nls(const char *charset);
extern void unload_nls(struct nls_table *);
extern struct nls_table *load_nls_default(void);
#define register_nls(nls) __register_nls((nls), THIS_MODULE)
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 86544707236a..45702bdcbceb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -73,9 +73,7 @@ struct raw_notifier_head {
struct srcu_notifier_head {
struct mutex mutex;
-#ifdef CONFIG_TREE_SRCU
struct srcu_usage srcuu;
-#endif
struct srcu_struct srcu;
struct notifier_block __rcu *head;
};
@@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#define RAW_NOTIFIER_INIT(name) { \
.head = NULL }
-#ifdef CONFIG_TREE_SRCU
#define SRCU_NOTIFIER_INIT(name, pcpu) \
{ \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
@@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
.srcuu = __SRCU_USAGE_INIT(name.srcuu), \
.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
}
-#else
-#define SRCU_NOTIFIER_INIT(name, pcpu) \
- { \
- .mutex = __MUTEX_INITIALIZER(name.mutex), \
- .head = NULL, \
- .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
- }
-#endif
#define ATOMIC_NOTIFIER_HEAD(name) \
struct atomic_notifier_head name = \
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index fee881cded01..771cb0285872 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -29,7 +29,7 @@ struct fs_struct;
* nsproxy is copied.
*/
struct nsproxy {
- atomic_t count;
+ refcount_t count;
struct uts_namespace *uts_ns;
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
@@ -102,14 +102,13 @@ int __init nsproxy_cache_init(void);
static inline void put_nsproxy(struct nsproxy *ns)
{
- if (atomic_dec_and_test(&ns->count)) {
+ if (refcount_dec_and_test(&ns->count))
free_nsproxy(ns);
- }
}
static inline void get_nsproxy(struct nsproxy *ns)
{
- atomic_inc(&ns->count);
+ refcount_inc(&ns->count);
}
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 716953ee1ebd..d87840acbfb2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -470,6 +470,19 @@ static inline void *detach_page_private(struct page *page)
return folio_detach_private(page_folio(page));
}
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size. I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER 8
+#endif
+
#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
#else
@@ -501,22 +514,69 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
pgoff_t page_cache_prev_miss(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);
-#define FGP_ACCESSED 0x00000001
-#define FGP_LOCK 0x00000002
-#define FGP_CREAT 0x00000004
-#define FGP_WRITE 0x00000008
-#define FGP_NOFS 0x00000010
-#define FGP_NOWAIT 0x00000020
-#define FGP_FOR_MMAP 0x00000040
-#define FGP_STABLE 0x00000080
+/**
+ * typedef fgf_t - Flags for getting folios from the page cache.
+ *
+ * Most users of the page cache will not need to use these flags;
+ * there are convenience functions such as filemap_get_folio() and
+ * filemap_lock_folio(). For users which need more control over exactly
+ * what is done with the folios, these flags to __filemap_get_folio()
+ * are available.
+ *
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
+ * * %FGP_CREAT - If no folio is present then a new folio is allocated,
+ * added to the page cache and the VM's LRU list. The folio is
+ * returned locked.
+ * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
+ * folio is already in cache. If the folio was allocated, unlock it
+ * before returning so the caller can do the same dance.
+ * * %FGP_WRITE - The folio will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't block on the folio lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
+ * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
+ * implementation.
+ */
+typedef unsigned int __bitwise fgf_t;
+
+#define FGP_ACCESSED ((__force fgf_t)0x00000001)
+#define FGP_LOCK ((__force fgf_t)0x00000002)
+#define FGP_CREAT ((__force fgf_t)0x00000004)
+#define FGP_WRITE ((__force fgf_t)0x00000008)
+#define FGP_NOFS ((__force fgf_t)0x00000010)
+#define FGP_NOWAIT ((__force fgf_t)0x00000020)
+#define FGP_FOR_MMAP ((__force fgf_t)0x00000040)
+#define FGP_STABLE ((__force fgf_t)0x00000080)
+#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */
#define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
+/**
+ * fgf_set_order - Encode a length in the fgf_t flags.
+ * @size: The suggested size of the folio to create.
+ *
+ * The caller of __filemap_get_folio() can use this to suggest a preferred
+ * size for the folio that is created. If there is already a folio at
+ * the index, it will be returned, no matter what its size. If a folio
+ * is freshly created, it may be of a different size than requested
+ * due to alignment constraints, memory pressure, or the presence of
+ * other folios at nearby indices.
+ */
+static inline fgf_t fgf_set_order(size_t size)
+{
+ unsigned int shift = ilog2(size);
+
+ if (shift <= PAGE_SHIFT)
+ return 0;
+ return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
+}
+
void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+ fgf_t fgp_flags, gfp_t gfp);
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+ fgf_t fgp_flags, gfp_t gfp);
/**
* filemap_get_folio - Find and get a folio.
@@ -590,7 +650,7 @@ static inline struct page *find_get_page(struct address_space *mapping,
}
static inline struct page *find_get_page_flags(struct address_space *mapping,
- pgoff_t offset, int fgp_flags)
+ pgoff_t offset, fgf_t fgp_flags)
{
return pagecache_get_page(mapping, offset, fgp_flags, 0);
}
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 27a6df448ee5..27cd1e59ccf7 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -6,6 +6,16 @@
struct mm_walk;
+/* Locking requirement during a page walk. */
+enum page_walk_lock {
+ /* mmap_lock should be locked for read to stabilize the vma tree */
+ PGWALK_RDLOCK = 0,
+ /* vma will be write-locked during the walk */
+ PGWALK_WRLOCK = 1,
+ /* vma is expected to be already write-locked during the walk */
+ PGWALK_WRLOCK_VERIFY = 2,
+};
+
/**
* struct mm_walk_ops - callbacks for walk_page_range
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
@@ -66,6 +76,7 @@ struct mm_walk_ops {
int (*pre_vma)(unsigned long start, unsigned long end,
struct mm_walk *walk);
void (*post_vma)(struct mm_walk *walk);
+ enum page_walk_lock walk_lock;
};
/*
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 02e0086b10f6..608a9eb86bff 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -269,10 +269,10 @@ bool pipe_is_unprivileged_user(void);
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
+long pipe_fcntl(struct file *, unsigned int, unsigned int arg);
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
int create_pipe_files(struct file **, int);
-unsigned int round_pipe_size(unsigned long size);
+unsigned int round_pipe_size(unsigned int size);
#endif
diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
index dd42d16945d0..d9642c6cf852 100644
--- a/include/linux/pm_wakeirq.h
+++ b/include/linux/pm_wakeirq.h
@@ -10,8 +10,6 @@ extern int dev_pm_set_wake_irq(struct device *dev, int irq);
extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq);
extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq);
extern void dev_pm_clear_wake_irq(struct device *dev);
-extern void dev_pm_enable_wake_irq(struct device *dev);
-extern void dev_pm_disable_wake_irq(struct device *dev);
#else /* !CONFIG_PM */
@@ -34,13 +32,5 @@ static inline void dev_pm_clear_wake_irq(struct device *dev)
{
}
-static inline void dev_pm_enable_wake_irq(struct device *dev)
-{
-}
-
-static inline void dev_pm_disable_wake_irq(struct device *dev)
-{
-}
-
#endif /* CONFIG_PM */
#endif /* _LINUX_PM_WAKEIRQ_H */
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index b83a3f944f28..b068e2e60939 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -25,11 +25,10 @@ struct page;
prefetch() should be defined by the architecture, if not, the
#define below provides a no-op define.
- There are 3 prefetch() macros:
+ There are 2 prefetch() macros:
prefetch(x) - prefetches the cacheline at "x" for read
prefetchw(x) - prefetches the cacheline at "x" for write
- spin_lock_prefetch(x) - prefetches the spinlock *x for taking
there is also PREFETCH_STRIDE which is the architecure-preferred
"lookahead" size for prefetching streamed operations.
@@ -44,10 +43,6 @@ struct page;
#define prefetchw(x) __builtin_prefetch(x,1)
#endif
-#ifndef ARCH_HAS_SPINLOCK_PREFETCH
-#define spin_lock_prefetch(x) prefetchw(x)
-#endif
-
#ifndef PREFETCH_STRIDE
#define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
#endif
diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h
index 6a9b177d5c41..e50416ba9cd9 100644
--- a/include/linux/raid_class.h
+++ b/include/linux/raid_class.h
@@ -77,7 +77,3 @@ DEFINE_RAID_ATTRIBUTE(enum raid_state, state)
struct raid_template *raid_class_attach(struct raid_function_template *);
void raid_class_release(struct raid_template *);
-
-int __must_check raid_component_add(struct raid_template *, struct device *,
- struct device *);
-
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index ba4c00dd8005..89186c499dd4 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
{
struct hlist_nulls_node *first = h->first;
- n->next = first;
+ WRITE_ONCE(n->next, first);
WRITE_ONCE(n->pprev, &h->first);
rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
if (!is_a_nulls(first))
@@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
last = i;
if (last) {
- n->next = last->next;
+ WRITE_ONCE(n->next, last->next);
n->pprev = &last->next;
rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
} else {
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index 9bc8cbb33340..eda493200663 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -87,6 +87,7 @@ static inline void rcu_read_unlock_trace(void)
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
void synchronize_rcu_tasks_trace(void);
void rcu_barrier_tasks_trace(void);
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void);
#else
/*
* The BPF JIT forms these addresses even when it doesn't call these
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 699b938358bf..5e0f74f2f8ca 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -42,6 +42,11 @@ do { \
* call_srcu() function, with this wrapper supplying the pointer to the
* corresponding srcu_struct.
*
+ * Note that call_rcu_hurry() should be used instead of call_rcu()
+ * because in kernels built with CONFIG_RCU_LAZY=y the delay between the
+ * invocation of call_rcu() and that of the corresponding RCU callback
+ * can be multiple seconds.
+ *
* The first argument tells Tiny RCU's _wait_rcu_gp() not to
* bother waiting for RCU. The reason for this is because anywhere
* synchronize_rcu_mult() can be called is automatically already a full
diff --git a/include/linux/security.h b/include/linux/security.h
index 32828502f09e..bac98ea18f78 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -293,6 +293,7 @@ int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
int security_bprm_check(struct linux_binprm *bprm);
void security_bprm_committing_creds(struct linux_binprm *bprm);
void security_bprm_committed_creds(struct linux_binprm *bprm);
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference);
int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
int security_sb_alloc(struct super_block *sb);
@@ -629,6 +630,11 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
{
}
+static inline int security_fs_context_submount(struct fs_context *fc,
+ struct super_block *reference)
+{
+ return 0;
+}
static inline int security_fs_context_dup(struct fs_context *fc,
struct fs_context *src_fc)
{
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index bd023dd38ae6..386ab580b839 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -249,18 +249,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name,
/**
* seq_show_option_n - display mount options with appropriate escapes
- * where @value must be a specific length.
+ * where @value must be a specific length (i.e.
+ * not NUL-terminated).
* @m: the seq_file handle
* @name: the mount option name
* @value: the mount option name's value, cannot be NULL
- * @length: the length of @value to display
+ * @length: the exact length of @value to display, must be constant expression
*
* This is a macro since this uses "length" to define the size of the
* stack buffer.
*/
#define seq_show_option_n(m, name, value, length) { \
char val_buf[length + 1]; \
- strncpy(val_buf, value, length); \
+ memcpy(val_buf, value, length); \
val_buf[length] = '\0'; \
seq_show_option(m, name, val_buf); \
}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 6d58c57acdaa..a156d2ed8d9e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -459,7 +459,8 @@ struct uart_port {
struct serial_rs485 *rs485);
int (*iso7816_config)(struct uart_port *,
struct serial_iso7816 *iso7816);
- int ctrl_id; /* optional serial core controller id */
+ unsigned int ctrl_id; /* optional serial core controller id */
+ unsigned int port_id; /* optional serial core port id */
unsigned int irq; /* irq number */
unsigned long irqflags; /* irq flags */
unsigned int uartclk; /* base uart clock */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9029abd29b1c..6b0c626620f5 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -13,6 +13,10 @@
/* inode in-kernel data */
+#ifdef CONFIG_TMPFS_QUOTA
+#define SHMEM_MAXQUOTAS 2
+#endif
+
struct shmem_inode_info {
spinlock_t lock;
unsigned int seals; /* shmem seals */
@@ -27,6 +31,10 @@ struct shmem_inode_info {
atomic_t stop_eviction; /* hold when working on inode */
struct timespec64 i_crtime; /* file creation time */
unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */
+#ifdef CONFIG_TMPFS_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+#endif
+ struct offset_ctx dir_offsets; /* stable entry offsets */
struct inode vfs_inode;
};
@@ -35,11 +43,18 @@ struct shmem_inode_info {
(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
+struct shmem_quota_limits {
+ qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
+ qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
+ qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
+ qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
+};
+
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
struct percpu_counter used_blocks; /* How many are allocated */
unsigned long max_inodes; /* How many inodes are allowed */
- unsigned long free_inodes; /* How many are left for allocation */
+ unsigned long free_ispace; /* How much ispace left for allocation */
raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
umode_t mode; /* Mount mode for root directory */
unsigned char huge; /* Whether to try for hugepages */
@@ -53,6 +68,7 @@ struct shmem_sb_info {
spinlock_t shrinklist_lock; /* Protects shrinklist */
struct list_head shrinklist; /* List of shinkable inodes */
unsigned long shrinklist_len; /* Length of shrinklist */
+ struct shmem_quota_limits qlimits; /* Default quota limits */
};
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
@@ -172,4 +188,17 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
#endif /* CONFIG_SHMEM */
#endif /* CONFIG_USERFAULTFD */
+/*
+ * Used space is stored as unsigned 64-bit value in bytes but
+ * quota core supports only signed 64-bit values so use that
+ * as a limit
+ */
+#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
+#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
+
+#ifdef CONFIG_TMPFS_QUOTA
+extern const struct dquot_operations shmem_quota_operations;
+extern struct quota_format_type shmem_quota_format;
+#endif /* CONFIG_TMPFS_QUOTA */
+
#endif
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 054d7911bfc9..c1637515a8a4 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -62,6 +62,7 @@ struct sk_psock_progs {
enum sk_psock_state_bits {
SK_PSOCK_TX_ENABLED,
+ SK_PSOCK_RX_STRP_ENABLED,
};
struct sk_psock_link {
diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h
index 0b857616919c..fc6c1515dc54 100644
--- a/include/linux/spi/corgi_lcd.h
+++ b/include/linux/spi/corgi_lcd.h
@@ -15,4 +15,6 @@ struct corgi_lcd_platform_data {
void (*kick_battery)(void);
};
+void corgi_lcd_limit_intensity(int limit);
+
#endif /* __LINUX_SPI_CORGI_LCD_H */
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 8e984d75f5b6..6b0a7dc48a4b 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -101,6 +101,7 @@ struct spi_mem_op {
u8 nbytes;
u8 buswidth;
u8 dtr : 1;
+ u8 __pad : 7;
u16 opcode;
} cmd;
@@ -108,6 +109,7 @@ struct spi_mem_op {
u8 nbytes;
u8 buswidth;
u8 dtr : 1;
+ u8 __pad : 7;
u64 val;
} addr;
@@ -115,12 +117,14 @@ struct spi_mem_op {
u8 nbytes;
u8 buswidth;
u8 dtr : 1;
+ u8 __pad : 7;
} dummy;
struct {
u8 buswidth;
u8 dtr : 1;
u8 ecc : 1;
+ u8 __pad : 6;
enum spi_mem_data_dir dir;
unsigned int nbytes;
union {
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index ebd72491af99..447133171d95 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -48,6 +48,10 @@ void srcu_drive_gp(struct work_struct *wp);
#define DEFINE_STATIC_SRCU(name) \
static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name)
+// Dummy structure for srcu_notifier_head.
+struct srcu_usage { };
+#define __SRCU_USAGE_INIT(name) { }
+
void synchronize_srcu(struct srcu_struct *ssp);
/*
diff --git a/include/linux/swait.h b/include/linux/swait.h
index 6a8c22b8c2a5..d324419482a0 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
extern void swake_up_one(struct swait_queue_head *q);
extern void swake_up_all(struct swait_queue_head *q);
-extern void swake_up_locked(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 03e3d0121d5e..c0cb22cd607d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -284,22 +284,6 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
#endif
/*
- * Called before coming back to user-mode. Returning to user-mode with an
- * address limit different than USER_DS can allow to overwrite kernel memory.
- */
-static inline void addr_limit_user_check(void)
-{
-#ifdef TIF_FSCHECK
- if (!test_thread_flag(TIF_FSCHECK))
- return;
-#endif
-
-#ifdef TIF_FSCHECK
- clear_thread_flag(TIF_FSCHECK);
-#endif
-}
-
-/*
* These syscall function prototypes are kept in the same order as
* include/uapi/asm-generic/unistd.h. Architecture specific entries go below,
* followed by deprecated or obsolete system calls.
@@ -438,8 +422,10 @@ asmlinkage long sys_chdir(const char __user *filename);
asmlinkage long sys_fchdir(unsigned int fd);
asmlinkage long sys_chroot(const char __user *filename);
asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
-asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
umode_t mode);
+asmlinkage long sys_fchmodat2(int dfd, const char __user *filename,
+ umode_t mode, unsigned int flags);
asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
gid_t group, int flag);
asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 87837094d549..dee66ade89a0 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -301,14 +301,14 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
#ifdef CONFIG_THERMAL
struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
void *, struct thermal_zone_device_ops *,
- struct thermal_zone_params *, int, int);
+ const struct thermal_zone_params *, int, int);
void thermal_zone_device_unregister(struct thermal_zone_device *);
struct thermal_zone_device *
thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int,
void *, struct thermal_zone_device_ops *,
- struct thermal_zone_params *, int, int);
+ const struct thermal_zone_params *, int, int);
void *thermal_zone_device_priv(struct thermal_zone_device *tzd);
const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
@@ -348,7 +348,7 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz);
static inline struct thermal_zone_device *thermal_zone_device_register(
const char *type, int trips, int mask, void *devdata,
struct thermal_zone_device_ops *ops,
- struct thermal_zone_params *tzp,
+ const struct thermal_zone_params *tzp,
int passive_delay, int polling_delay)
{ return ERR_PTR(-ENODEV); }
static inline void thermal_zone_device_unregister(
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 7038104463e4..bb466eec01e4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -108,12 +108,15 @@ bool torture_must_stop(void);
bool torture_must_stop_irq(void);
void torture_kthread_stopping(char *title);
int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
- char *f, struct task_struct **tp);
+ char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp));
void _torture_stop_kthread(char *m, struct task_struct **tp);
#define torture_create_kthread(n, arg, tp) \
_torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
- "Failed to create " #n, &(tp))
+ "Failed to create " #n, &(tp), NULL)
+#define torture_create_kthread_cb(n, arg, tp, cbf) \
+ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
+ "Failed to create " #n, &(tp), cbf)
#define torture_stop_kthread(n, tp) \
_torture_stop_kthread("Stopping " #n " task", &(tp))
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 6a1e8f157255..4ee9d13749ad 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -283,6 +283,7 @@ enum tpm_chip_flags {
TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED = BIT(6),
TPM_CHIP_FLAG_FIRMWARE_UPGRADE = BIT(7),
TPM_CHIP_FLAG_SUSPENDED = BIT(8),
+ TPM_CHIP_FLAG_HWRNG_DISABLED = BIT(9),
};
#define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3930e676436c..1e8bbdb8da90 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter,
extern __printf(2, 3)
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
+/* Used to find the offset and length of dynamic fields in trace events */
+struct trace_dynamic_info {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ u16 offset;
+ u16 len;
+#else
+ u16 len;
+ u16 offset;
+#endif
+};
+
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ff81e5ccaef2..42bce38a8e87 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -163,7 +163,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
return ret;
}
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
size_t bytes, struct iov_iter *i);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
@@ -184,6 +184,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
{
return copy_page_to_iter(&folio->page, offset, bytes, i);
}
+
+static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
+ size_t offset, size_t bytes, struct iov_iter *i)
+{
+ return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
+}
+
size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
size_t bytes, struct iov_iter *i);
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index bdf8de2cdd93..7b4dd69555e4 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -155,6 +155,10 @@ retry:
if (gso_type & SKB_GSO_UDP)
nh_off -= thlen;
+ /* Kernel has a special handling for GSO_BY_FRAGS. */
+ if (gso_size == GSO_BY_FRAGS)
+ return -EINVAL;
+
/* Too small packets are not really GSO ones. */
if (skb->len - nh_off > gso_size) {
shinfo->gso_size = gso_size;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a0307b516b09..5ec7739400f4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
}
int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
@@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
#define wake_up_poll(x, m) \
__wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
+#define wake_up_poll_on_current_cpu(x, m) \
+ __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
#define wake_up_locked_poll(x, m) \
__wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
#define wake_up_interruptible_poll(x, m) \
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fba937999fbf..083387c00f0c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -375,11 +375,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
-void folio_account_redirty(struct folio *folio);
-static inline void account_page_redirty(struct page *page)
-{
- folio_account_redirty(page_folio(page));
-}
bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
bool redirty_page_for_writepage(struct writeback_control *, struct page *);
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d591ef59aa98..d20051865800 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -114,13 +114,15 @@ struct simple_xattr {
};
void simple_xattrs_init(struct simple_xattrs *xattrs);
-void simple_xattrs_free(struct simple_xattrs *xattrs);
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
+size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
+void simple_xattr_free(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags,
- ssize_t *removed_size);
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+ const char *name, const void *value,
+ size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
void simple_xattr_add(struct simple_xattrs *xattrs,