summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-29 08:50:42 +1100
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-29 08:50:42 +1100
commitd4928196fe9ec07d18139ba2735876b0c8aa738f (patch)
treea999ee3f18b0d724b2adb15ec72854e7ade164fa /include
parentbb04af0e2e5bcd8d1a5d7f7d5c704f7eb328f241 (diff)
parentfebffd61816ef6d4f84189468c1d47f1df55921e (diff)
Merge branch 'cfq-ioc-share' of git://git.kernel.dk/linux-2.6-block
* 'cfq-ioc-share' of git://git.kernel.dk/linux-2.6-block: cfq-iosched: kill some big inlines cfq-iosched: relax IOPRIO_CLASS_IDLE restrictions kernel: add CLONE_IO to specifically request sharing of IO contexts io_context sharing - anticipatory changes block: cfq: make the io contect sharing lockless io_context sharing - cfq changes io context sharing: preliminary support ioprio: move io priority from task_struct to io_context
Diffstat (limited to 'include')
-rw-r--r--include/linux/blkdev.h83
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/iocontext.h95
-rw-r--r--include/linux/ioprio.h13
-rw-r--r--include/linux/sched.h2
5 files changed, 111 insertions, 83 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49b7a4c31a6d..2483a05231c7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -34,83 +34,10 @@ struct sg_io_hdr;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
-/*
- * This is the per-process anticipatory I/O scheduler state.
- */
-struct as_io_context {
- spinlock_t lock;
-
- void (*dtor)(struct as_io_context *aic); /* destructor */
- void (*exit)(struct as_io_context *aic); /* called on task exit */
-
- unsigned long state;
- atomic_t nr_queued; /* queued reads & sync writes */
- atomic_t nr_dispatched; /* number of requests gone to the drivers */
-
- /* IO History tracking */
- /* Thinktime */
- unsigned long last_end_request;
- unsigned long ttime_total;
- unsigned long ttime_samples;
- unsigned long ttime_mean;
- /* Layout pattern */
- unsigned int seek_samples;
- sector_t last_request_pos;
- u64 seek_total;
- sector_t seek_mean;
-};
-
-struct cfq_queue;
-struct cfq_io_context {
- struct rb_node rb_node;
- void *key;
-
- struct cfq_queue *cfqq[2];
-
- struct io_context *ioc;
-
- unsigned long last_end_request;
- sector_t last_request_pos;
-
- unsigned long ttime_total;
- unsigned long ttime_samples;
- unsigned long ttime_mean;
-
- unsigned int seek_samples;
- u64 seek_total;
- sector_t seek_mean;
-
- struct list_head queue_list;
-
- void (*dtor)(struct io_context *); /* destructor */
- void (*exit)(struct io_context *); /* called on task exit */
-};
-
-/*
- * This is the per-process I/O subsystem state. It is refcounted and
- * kmalloc'ed. Currently all fields are modified in process io context
- * (apart from the atomic refcount), so require no locking.
- */
-struct io_context {
- atomic_t refcount;
- struct task_struct *task;
-
- unsigned int ioprio_changed;
-
- /*
- * For request batching
- */
- unsigned long last_waited; /* Time last woken after wait for request */
- int nr_batch_requests; /* Number of requests left in the batch */
-
- struct as_io_context *aic;
- struct rb_root cic_root;
- void *ioc_data;
-};
-
-void put_io_context(struct io_context *ioc);
+int put_io_context(struct io_context *ioc);
void exit_io_context(void);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
+struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
@@ -894,6 +821,12 @@ static inline void exit_io_context(void)
{
}
+static inline int put_io_context(struct io_context *ioc)
+{
+ return 1;
+}
+
+
#endif /* CONFIG_BLOCK */
#endif
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 796019b22b6f..e6b3f7080679 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -137,7 +137,6 @@ extern struct group_info init_groups;
.time_slice = HZ, \
.nr_cpus_allowed = NR_CPUS, \
}, \
- .ioprio = 0, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
.ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
new file mode 100644
index 000000000000..593b222d9dcc
--- /dev/null
+++ b/include/linux/iocontext.h
@@ -0,0 +1,95 @@
+#ifndef IOCONTEXT_H
+#define IOCONTEXT_H
+
+#include <linux/radix-tree.h>
+
+/*
+ * This is the per-process anticipatory I/O scheduler state.
+ */
+struct as_io_context {
+ spinlock_t lock;
+
+ void (*dtor)(struct as_io_context *aic); /* destructor */
+ void (*exit)(struct as_io_context *aic); /* called on task exit */
+
+ unsigned long state;
+ atomic_t nr_queued; /* queued reads & sync writes */
+ atomic_t nr_dispatched; /* number of requests gone to the drivers */
+
+ /* IO History tracking */
+ /* Thinktime */
+ unsigned long last_end_request;
+ unsigned long ttime_total;
+ unsigned long ttime_samples;
+ unsigned long ttime_mean;
+ /* Layout pattern */
+ unsigned int seek_samples;
+ sector_t last_request_pos;
+ u64 seek_total;
+ sector_t seek_mean;
+};
+
+struct cfq_queue;
+struct cfq_io_context {
+ void *key;
+ unsigned long dead_key;
+
+ struct cfq_queue *cfqq[2];
+
+ struct io_context *ioc;
+
+ unsigned long last_end_request;
+ sector_t last_request_pos;
+
+ unsigned long ttime_total;
+ unsigned long ttime_samples;
+ unsigned long ttime_mean;
+
+ unsigned int seek_samples;
+ u64 seek_total;
+ sector_t seek_mean;
+
+ struct list_head queue_list;
+
+ void (*dtor)(struct io_context *); /* destructor */
+ void (*exit)(struct io_context *); /* called on task exit */
+};
+
+/*
+ * I/O subsystem state of the associated processes. It is refcounted
+ * and kmalloc'ed. These could be shared between processes.
+ */
+struct io_context {
+ atomic_t refcount;
+ atomic_t nr_tasks;
+
+ /* all the fields below are protected by this lock */
+ spinlock_t lock;
+
+ unsigned short ioprio;
+ unsigned short ioprio_changed;
+
+ /*
+ * For request batching
+ */
+ unsigned long last_waited; /* Time last woken after wait for request */
+ int nr_batch_requests; /* Number of requests left in the batch */
+
+ struct as_io_context *aic;
+ struct radix_tree_root radix_root;
+ void *ioc_data;
+};
+
+static inline struct io_context *ioc_task_link(struct io_context *ioc)
+{
+ /*
+ * if ref count is zero, don't allow sharing (ioc is going away, it's
+ * a race).
+ */
+ if (ioc && atomic_inc_not_zero(&ioc->refcount))
+ return ioc;
+
+ return NULL;
+}
+
+#endif
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index baf29387cab4..2a3bb1bb7433 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -2,6 +2,7 @@
#define IOPRIO_H
#include <linux/sched.h>
+#include <linux/iocontext.h>
/*
* Gives us 8 prio classes with 13-bits of data for each class
@@ -45,18 +46,18 @@ enum {
* the cpu scheduler nice value to an io priority
*/
#define IOPRIO_NORM (4)
-static inline int task_ioprio(struct task_struct *task)
+static inline int task_ioprio(struct io_context *ioc)
{
- if (ioprio_valid(task->ioprio))
- return IOPRIO_PRIO_DATA(task->ioprio);
+ if (ioprio_valid(ioc->ioprio))
+ return IOPRIO_PRIO_DATA(ioc->ioprio);
return IOPRIO_NORM;
}
-static inline int task_ioprio_class(struct task_struct *task)
+static inline int task_ioprio_class(struct io_context *ioc)
{
- if (ioprio_valid(task->ioprio))
- return IOPRIO_PRIO_CLASS(task->ioprio);
+ if (ioprio_valid(ioc->ioprio))
+ return IOPRIO_PRIO_CLASS(ioc->ioprio);
return IOPRIO_CLASS_BE;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index df5b24ee80b3..2d0546e884ea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -27,6 +27,7 @@
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
#define CLONE_NEWPID 0x20000000 /* New pid namespace */
#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#define CLONE_IO 0x80000000 /* Clone io context */
/*
* Scheduling policies
@@ -975,7 +976,6 @@ struct task_struct {
struct hlist_head preempt_notifiers;
#endif
- unsigned short ioprio;
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu