summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPauli Nieminen <suokkos@gmail.com>2009-08-26 14:44:16 +0300
committerPauli Nieminen <suokkos@gmail.com>2009-08-29 04:54:29 +0300
commitf390f19de36bb7f68edbc12f62ec64d532aaf552 (patch)
tree0c33a2e70e89c6d8faddb116e3e5e3f5e1e724a1
parent68be4caecd4815484891f784266452797122b17b (diff)
libdrm_radeon: Inline section start and make section a stack variable.
Section start is called a lot (9M times in 180 frames of xmoto at r300) from mesa. virtual function overhead is huge in that number of calls so it is cheper let compiler to inline section begin. Another change same time is to move section data to stack. This makes it possible to do parallel emit in future with minimal changes.
-rw-r--r--libdrm/radeon/radeon_cs.h128
-rw-r--r--libdrm/radeon/radeon_cs_gem.c64
2 files changed, 108 insertions, 84 deletions
diff --git a/libdrm/radeon/radeon_cs.h b/libdrm/radeon/radeon_cs.h
index 1117a850..0f02e8d0 100644
--- a/libdrm/radeon/radeon_cs.h
+++ b/libdrm/radeon/radeon_cs.h
@@ -61,6 +61,15 @@ struct radeon_cs_space_check {
struct radeon_cs_manager;
+struct radeon_cs_section {
+ unsigned ndw;
+ unsigned cdw;
+ unsigned cs_sdw;
+ const char *file;
+ const char *func;
+ int line;
+};
+
struct radeon_cs {
struct radeon_cs_manager *csm;
void *relocs;
@@ -69,12 +78,6 @@ struct radeon_cs {
unsigned relocs_total_size;
unsigned cdw;
unsigned ndw;
- int section;
- unsigned section_ndw;
- unsigned section_cdw;
- const char *section_file;
- const char *section_func;
- int section_line;
struct radeon_cs_space_check bos[MAX_SPACE_BOS];
int bo_count;
void (*space_flush_fn)(void *);
@@ -86,19 +89,14 @@ struct radeon_cs_funcs {
struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm,
uint32_t ndw);
int (*cs_write_reloc)(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
struct radeon_bo *bo,
uint32_t read_domain,
uint32_t write_domain,
uint32_t flags);
- int (*cs_begin)(struct radeon_cs *cs,
- uint32_t ndw,
- const char *file,
- const char *func,
- int line);
- int (*cs_end)(struct radeon_cs *cs,
- const char *file,
- const char *func,
- int line);
+ int (*cs_realloc)(struct radeon_cs *cs,
+ uint32_t begin,
+ uint32_t ndw);
int (*cs_emit)(struct radeon_cs *cs);
int (*cs_destroy)(struct radeon_cs *cs);
int (*cs_erase)(struct radeon_cs *cs);
@@ -121,33 +119,97 @@ static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
}
static inline int radeon_cs_write_reloc(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
struct radeon_bo *bo,
uint32_t read_domain,
uint32_t write_domain,
uint32_t flags)
{
return cs->csm->funcs->cs_write_reloc(cs,
+ section,
bo,
read_domain,
write_domain,
flags);
}
+static inline uint32_t __radeon_cs_alloc_section(struct radeon_cs *cs,
+ uint32_t ndw)
+{
+ /* If we ever want to do parellel driver we need to change
+ this to atomic fetch and add operation. For now simple
+ read and add is enough. */
+ uint32_t begin = cs->cdw;
+ cs->cdw += ndw;
+ /* Handle cs overflow */
+ if (begin + ndw > cs->ndw) {
+ /* Just for backward compability!
+ We can't do realloc in parallel mode so this probably should be remoed. */
+ int rv = cs->csm->funcs->cs_realloc(cs, begin, ndw);
+ if (rv != 0)
+ return (uint32_t)rv;
+ }
+ return begin;
+}
+
+
+#define RADEON_CS_BATCH_LOCAL(name) struct radeon_cs_section name = {0}
+
static inline int radeon_cs_begin(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
uint32_t ndw,
const char *file,
const char *func,
int line)
{
- return cs->csm->funcs->cs_begin(cs, ndw, file, func, line);
+ uint32_t begin;
+ if (section->ndw) {
+ fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
+ section->file, section->func, section->line);
+ fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
+ file, func, line);
+ return -EPIPE;
+ }
+
+ section->ndw = ndw;
+ section->cdw = 0;
+ section->file = file;
+ section->func = func;
+ section->line = line;
+ begin = __radeon_cs_alloc_section(cs, ndw);
+ if (begin > cs->ndw) {
+ return (int)begin;
+ }
+ section->cs_sdw = begin;
+ return 0;
}
static inline int radeon_cs_end(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
const char *file,
const char *func,
int line)
{
- return cs->csm->funcs->cs_end(cs, file, func, line);
+ int rv = 0;
+ if (!section->ndw) {
+ fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
+ file, func, line);
+ rv = -EPIPE;
+ goto out;
+ }
+
+ if (section->cdw != section->ndw) {
+ fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
+ section->file, section->func, section->line, section->ndw, section->cdw);
+ fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+ file, func, line);
+
+ rv = -EPIPE;
+ goto out;
+ }
+out:
+ section->ndw = 0;
+ return rv;
}
static inline int radeon_cs_emit(struct radeon_cs *cs)
@@ -184,30 +246,28 @@ static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, ui
cs->csm->gart_limit = limit;
}
-static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
+static inline void radeon_cs_write_dword(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
+ uint32_t dword)
{
- cs->packets[cs->cdw++] = dword;
- if (cs->section) {
- cs->section_cdw++;
- }
+ cs->packets[section->cs_sdw + section->cdw] = dword;
+ section->cdw++;
}
-static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
+static inline void radeon_cs_write_qword(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
+ uint64_t qword)
{
- memcpy(cs->packets + cs->cdw, &qword, sizeof(uint64_t));
- cs->cdw += 2;
- if (cs->section) {
- cs->section_cdw += 2;
- }
+ memcpy(cs->packets + section->cs_sdw + section->cdw, &qword, sizeof(uint64_t));
+ section->cdw += 2;
}
-static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size)
+static inline void radeon_cs_write_table(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
+ void *data, uint32_t size)
{
- memcpy(cs->packets + cs->cdw, data, size * 4);
- cs->cdw += size;
- if (cs->section) {
- cs->section_cdw += size;
- }
+ memcpy(cs->packets + section->cs_sdw + section->cdw, data, size * 4);
+ section->cdw += size;
}
static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
diff --git a/libdrm/radeon/radeon_cs_gem.c b/libdrm/radeon/radeon_cs_gem.c
index 62bda507..8af3005f 100644
--- a/libdrm/radeon/radeon_cs_gem.c
+++ b/libdrm/radeon/radeon_cs_gem.c
@@ -111,6 +111,7 @@ static struct radeon_cs *cs_gem_create(struct radeon_cs_manager *csm,
}
static int cs_gem_write_reloc(struct radeon_cs *cs,
+ struct radeon_cs_section *section,
struct radeon_bo *bo,
uint32_t read_domain,
uint32_t write_domain,
@@ -165,8 +166,8 @@ static int cs_gem_write_reloc(struct radeon_cs *cs,
/* update flags */
reloc->flags |= (flags & reloc->flags);
/* write relocation packet */
- radeon_cs_write_dword(cs, 0xc0001000);
- radeon_cs_write_dword(cs, idx);
+ radeon_cs_write_dword(cs, section, 0xc0001000);
+ radeon_cs_write_dword(cs, section, idx);
return 0;
}
}
@@ -199,40 +200,26 @@ static int cs_gem_write_reloc(struct radeon_cs *cs,
csg->chunks[1].length_dw += RELOC_SIZE;
radeon_bo_ref(bo);
cs->relocs_total_size += bo->size;
- radeon_cs_write_dword(cs, 0xc0001000);
- radeon_cs_write_dword(cs, idx);
+ radeon_cs_write_dword(cs, section, 0xc0001000);
+ radeon_cs_write_dword(cs, section, idx);
return 0;
}
-static int cs_gem_begin(struct radeon_cs *cs,
- uint32_t ndw,
- const char *file,
- const char *func,
- int line)
+static int cs_gem_realloc(struct radeon_cs *cs,
+ uint32_t begin,
+ uint32_t ndw)
{
+ /* Untill there is no multithreaded access
+ to cs we can omit locking. */
- if (cs->section) {
- fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
- cs->section_file, cs->section_func, cs->section_line);
- fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
- file, func, line);
- return -EPIPE;
- }
- cs->section = 1;
- cs->section_ndw = ndw;
- cs->section_cdw = 0;
- cs->section_file = file;
- cs->section_func = func;
- cs->section_line = line;
-
-
- if (cs->cdw + ndw > cs->ndw) {
+ if (begin + ndw > cs->ndw) {
uint32_t tmp, *ptr;
tmp = (cs->ndw + 1 + CS_ALIGNMENT) & (~CS_ALIGNMENT);
- if (tmp > CS_MAX_SIZE)
+ if (tmp > CS_MAX_SIZE) {
return -ENOMEM;
+ }
ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
if (ptr == NULL) {
@@ -243,28 +230,7 @@ static int cs_gem_begin(struct radeon_cs *cs,
}
return 0;
-}
-
-static int cs_gem_end(struct radeon_cs *cs,
- const char *file,
- const char *func,
- int line)
-{
- if (!cs->section) {
- fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
- file, func, line);
- return -EPIPE;
- }
- cs->section = 0;
- if (cs->section_ndw != cs->section_cdw) {
- fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
- cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
- fprintf(stderr, "CS section end at (%s,%s,%d)\n",
- file, func, line);
- return -EPIPE;
- }
- return 0;
}
static int cs_gem_emit(struct radeon_cs *cs)
@@ -322,7 +288,6 @@ static int cs_gem_erase(struct radeon_cs *cs)
}
cs->relocs_total_size = 0;
cs->cdw = 0;
- cs->section = 0;
cs->crelocs = 0;
csg->chunks[0].length_dw = 0;
csg->chunks[1].length_dw = 0;
@@ -435,8 +400,7 @@ static void cs_gem_print(struct radeon_cs *cs, FILE *file)
static struct radeon_cs_funcs radeon_cs_gem_funcs = {
cs_gem_create,
cs_gem_write_reloc,
- cs_gem_begin,
- cs_gem_end,
+ cs_gem_realloc,
cs_gem_emit,
cs_gem_destroy,
cs_gem_erase,