summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix Kuehling <fxkuehl@gmx.de>2005-03-06 01:41:06 +0000
committerFelix Kuehling <fxkuehl@gmx.de>2005-03-06 01:41:06 +0000
commit15995234b4d6cb848d919b0342b5697fffe80c89 (patch)
tree04c8f9856711f58b458068ec123a9e386b67cc05
parent26f04a16645edb366fce16060f6d5d01f2ff54b3 (diff)
Added support for command DMA on Savage4-based hardware. Unfortunately
command and vertex DMA don't work at the same time. Command DMA performance is superior and works with all vertex formats. Bumped minor version and driver date.
-rw-r--r--shared-core/savage_bci.c315
-rw-r--r--shared-core/savage_drv.h87
-rw-r--r--shared-core/savage_state.c184
3 files changed, 506 insertions, 80 deletions
diff --git a/shared-core/savage_bci.c b/shared-core/savage_bci.c
index 3ddcccb6..8c58873a 100644
--- a/shared-core/savage_bci.c
+++ b/shared-core/savage_bci.c
@@ -47,6 +47,7 @@ savage_bci_wait_fifo_shadow(drm_savage_private_t *dev_priv, unsigned int n)
#endif
for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+ DRM_MEMORYBARRIER();
status = dev_priv->status_ptr[0];
if ((status & mask) < threshold)
return 0;
@@ -120,6 +121,7 @@ savage_bci_wait_event_shadow(drm_savage_private_t *dev_priv, uint16_t e)
int i;
for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) {
+ DRM_MEMORYBARRIER();
status = dev_priv->status_ptr[1];
if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
(status & 0xffff) == 0)
@@ -247,7 +249,7 @@ static drm_buf_t *savage_freelist_get(drm_device_t *dev)
event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
wrap = dev_priv->event_wrap;
if (event > dev_priv->event_counter)
- wrap--; /* hardware hasn't passed the last wrap yet */
+ wrap--; /* hardware hasn't passed the last wrap yet */
DRM_DEBUG(" tail=0x%04x %d\n", tail->age.event, tail->age.wrap);
DRM_DEBUG(" head=0x%04x %d\n", event, wrap);
@@ -286,6 +288,225 @@ void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf)
}
/*
+ * Command DMA
+ */
+static int savage_dma_init(drm_savage_private_t *dev_priv)
+{
+ unsigned int i;
+
+ dev_priv->nr_dma_pages = dev_priv->cmd_dma->size /
+ (SAVAGE_DMA_PAGE_SIZE*4);
+ dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) *
+ dev_priv->nr_dma_pages,
+ DRM_MEM_DRIVER);
+ if (dev_priv->dma_pages == NULL)
+ return DRM_ERR(ENOMEM);
+
+ for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+ dev_priv->dma_pages[i].age.event = 0;
+ dev_priv->dma_pages[i].age.wrap = 0;
+ dev_priv->dma_pages[i].used = 0;
+ }
+
+ dev_priv->first_dma_page = 0;
+ dev_priv->current_dma_page = 0;
+
+ return 0;
+}
+
+void savage_dma_reset(drm_savage_private_t *dev_priv)
+{
+ uint16_t event;
+ unsigned int wrap, i;
+ event = savage_bci_emit_event(dev_priv, 0);
+ wrap = dev_priv->event_wrap;
+ for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+ SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+ dev_priv->dma_pages[i].used = 0;
+ }
+ dev_priv->first_dma_page = dev_priv->current_dma_page = 0;
+}
+
+void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page)
+{
+ uint16_t event;
+ unsigned int wrap;
+
+ /* Faked DMA buffer pages don't age. */
+ if (dev_priv->cmd_dma == &dev_priv->fake_dma)
+ return;
+
+ UPDATE_EVENT_COUNTER();
+ if (dev_priv->status_ptr)
+ event = dev_priv->status_ptr[1] & 0xffff;
+ else
+ event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+ wrap = dev_priv->event_wrap;
+ if (event > dev_priv->event_counter)
+ wrap--; /* hardware hasn't passed the last wrap yet */
+
+ if (dev_priv->dma_pages[page].age.wrap >= wrap &&
+ dev_priv->dma_pages[page].age.event > event) {
+ if (dev_priv->wait_evnt(dev_priv,
+ dev_priv->dma_pages[page].age.event)
+ < 0)
+ DRM_ERROR("wait_evnt failed!\n");
+ }
+}
+
+uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n)
+{
+ unsigned int cur = dev_priv->current_dma_page;
+ unsigned int rest = SAVAGE_DMA_PAGE_SIZE -
+ dev_priv->dma_pages[cur].used;
+ unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE-1) /
+ SAVAGE_DMA_PAGE_SIZE;
+ uint32_t *dma_ptr;
+ unsigned int i;
+
+ DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n",
+ cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages);
+
+ if (cur + nr_pages < dev_priv->nr_dma_pages) {
+ dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+ cur*SAVAGE_DMA_PAGE_SIZE +
+ dev_priv->dma_pages[cur].used;
+ if (n < rest)
+ rest = n;
+ dev_priv->dma_pages[cur].used += rest;
+ n -= rest;
+ cur++;
+ } else {
+ dev_priv->dma_flush(dev_priv);
+ nr_pages = (n + SAVAGE_DMA_PAGE_SIZE-1) / SAVAGE_DMA_PAGE_SIZE;
+ for (i = cur+1; i < dev_priv->nr_dma_pages; ++i) {
+ dev_priv->dma_pages[i].age =
+ dev_priv->dma_pages[cur].age;
+ dev_priv->dma_pages[i].used = 0;
+ }
+ dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle;
+ dev_priv->first_dma_page = cur = 0;
+ }
+ for (i = cur; nr_pages > 0; ++i, --nr_pages) {
+#if SAVAGE_DMA_DEBUG
+ if (dev_priv->dma_pages[i].used) {
+ DRM_ERROR("unflushed page %u: used=%u\n",
+ i, dev_priv->dma_pages[i].used);
+ }
+#endif
+ if (n > SAVAGE_DMA_PAGE_SIZE)
+ dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE;
+ else
+ dev_priv->dma_pages[i].used = n;
+ n -= SAVAGE_DMA_PAGE_SIZE;
+ }
+ dev_priv->current_dma_page = --i;
+
+ DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n",
+ i, dev_priv->dma_pages[i].used, n);
+
+ savage_dma_wait(dev_priv, dev_priv->current_dma_page);
+
+ return dma_ptr;
+}
+
+static void savage_dma_flush(drm_savage_private_t *dev_priv)
+{
+ BCI_LOCALS;
+ unsigned int cur = dev_priv->current_dma_page;
+ uint16_t event;
+ unsigned int wrap, pad, len, i;
+ unsigned long phys_addr;
+
+ if (dev_priv->first_dma_page == dev_priv->current_dma_page &&
+ dev_priv->dma_pages[dev_priv->current_dma_page].used == 0)
+ return;
+
+ /* pad to multiples of 8 entries (really needed? 2 should do it) */
+ pad = -dev_priv->dma_pages[cur].used & 7;
+ DRM_DEBUG("used=%d, pad=%u\n", dev_priv->dma_pages[cur].used, pad);
+
+ if (pad) {
+ uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+ cur * SAVAGE_DMA_PAGE_SIZE +
+ dev_priv->dma_pages[cur].used;
+ dev_priv->dma_pages[cur].used += pad;
+ while(pad != 0) {
+ *dma_ptr++ = BCI_CMD_WAIT;
+ pad--;
+ }
+ }
+
+ DRM_MEMORYBARRIER();
+
+ /* do flush ... */
+ phys_addr = dev_priv->cmd_dma->offset +
+ dev_priv->first_dma_page * SAVAGE_DMA_PAGE_SIZE*4;
+ len = (cur - dev_priv->first_dma_page) * SAVAGE_DMA_PAGE_SIZE +
+ dev_priv->dma_pages[cur].used;
+
+ DRM_DEBUG("phys_addr=%lx, len=%u\n",
+ phys_addr | dev_priv->dma_type, len);
+
+ BEGIN_BCI(3);
+ BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1);
+ BCI_WRITE(phys_addr | dev_priv->dma_type);
+ BCI_DMA(len);
+
+ /* age DMA pages */
+ event = savage_bci_emit_event(dev_priv, 0);
+ wrap = dev_priv->event_wrap;
+ for (i = dev_priv->first_dma_page;
+ i <= dev_priv->current_dma_page; ++i) {
+ SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+ dev_priv->dma_pages[i].used = 0;
+ }
+
+ /* advance to next page */
+ if (i == dev_priv->nr_dma_pages)
+ i = 0;
+ dev_priv->first_dma_page = dev_priv->current_dma_page = i;
+}
+
+static void savage_fake_dma_flush(drm_savage_private_t *dev_priv)
+{
+ BCI_LOCALS;
+ unsigned int i, j;
+ if (dev_priv->first_dma_page == dev_priv->current_dma_page &&
+ dev_priv->dma_pages[dev_priv->current_dma_page].used == 0)
+ return;
+
+ DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n",
+ dev_priv->first_dma_page, dev_priv->current_dma_page,
+ dev_priv->dma_pages[dev_priv->current_dma_page].used);
+
+ for (i = dev_priv->first_dma_page;
+ i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used;
+ ++i) {
+ uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+ i * SAVAGE_DMA_PAGE_SIZE;
+#if SAVAGE_DMA_DEBUG
+ /* Sanity check: all pages except the last one must be full. */
+ if (i < dev_priv->current_dma_page &&
+ dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) {
+ DRM_ERROR("partial DMA page %u: used=%u",
+ i, dev_priv->dma_pages[i].used);
+ }
+#endif
+ BEGIN_BCI(dev_priv->dma_pages[i].used);
+ for (j = 0; j < dev_priv->dma_pages[i].used; ++j) {
+ BCI_WRITE(dma_ptr[j]);
+ }
+ dev_priv->dma_pages[i].used = 0;
+ }
+
+ /* advance to next page */
+ if (i == dev_priv->nr_dma_pages)
+ i = 0;
+ dev_priv->first_dma_page = dev_priv->current_dma_page = i;
+}
+
+/*
* Initalize permanent mappings. On Savage4 and SavageIX the alignment
* and size of the aperture is not suitable for automatic MTRR setup
* in drm_initmap. Therefore we do it manually before the maps are
@@ -464,14 +685,20 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init)
} else {
dev_priv->status = NULL;
}
- if (dev_priv->dma_type == SAVAGE_DMA_AGP) {
+ if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) {
dev->agp_buffer_map = drm_core_findmap(dev,
init->buffers_offset);
if (!dev->agp_buffer_map) {
- DRM_ERROR("could not find dma buffer region!\n");
+ DRM_ERROR("could not find DMA buffer region!\n");
savage_do_cleanup_bci(dev);
return DRM_ERR(EINVAL);
}
+ drm_core_ioremap(dev->agp_buffer_map, dev);
+ if (!dev->agp_buffer_map) {
+ DRM_ERROR("failed to ioremap DMA buffer region!\n");
+ savage_do_cleanup_bci(dev);
+ return DRM_ERR(ENOMEM);
+ }
}
if (init->agp_textures_offset) {
dev_priv->agp_textures =
@@ -484,25 +711,65 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init)
} else {
dev_priv->agp_textures = NULL;
}
- if (0 && !S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
- /* command DMA not implemented yet */
+
+ if (init->cmd_dma_offset) {
+ if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+ DRM_ERROR("command DMA not supported on "
+ "Savage3D/MX/IX.\n");
+ savage_do_cleanup_bci(dev);
+ return DRM_ERR(EINVAL);
+ }
+ if (dev->dma && dev->dma->buflist) {
+ DRM_ERROR("command and vertex DMA not supported "
+ "at the same time.\n");
+ savage_do_cleanup_bci(dev);
+ return DRM_ERR(EINVAL);
+ }
dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset);
if (!dev_priv->cmd_dma) {
DRM_ERROR("could not find command DMA region!\n");
savage_do_cleanup_bci(dev);
return DRM_ERR(EINVAL);
}
+ if (dev_priv->dma_type == SAVAGE_DMA_AGP) {
+ if (dev_priv->cmd_dma->type != _DRM_AGP) {
+ DRM_ERROR("AGP command DMA region is not a "
+ "_DRM_AGP map!\n");
+ savage_do_cleanup_bci(dev);
+ return DRM_ERR(EINVAL);
+ }
+ drm_core_ioremap(dev_priv->cmd_dma, dev);
+ if (!dev_priv->cmd_dma->handle) {
+ DRM_ERROR("failed to ioremap command "
+ "DMA region!\n");
+ savage_do_cleanup_bci(dev);
+ return DRM_ERR(ENOMEM);
+ }
+ } else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) {
+ DRM_ERROR("PCI command DMA region is not a "
+ "_DRM_CONSISTENT map!\n");
+ savage_do_cleanup_bci(dev);
+ return DRM_ERR(EINVAL);
+ }
} else {
dev_priv->cmd_dma = NULL;
}
- if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP) {
- drm_core_ioremap(dev_priv->cmd_dma, dev);
- if (!dev_priv->cmd_dma->handle) {
- DRM_ERROR("failed to ioremap command DMA region!\n");
+ dev_priv->dma_flush = savage_dma_flush;
+ if (!dev_priv->cmd_dma) {
+ DRM_DEBUG("falling back to faked command DMA.\n");
+ dev_priv->fake_dma.offset = 0;
+ dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE;
+ dev_priv->fake_dma.type = _DRM_SHM;
+ dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE,
+ DRM_MEM_DRIVER);
+ if (!dev_priv->fake_dma.handle) {
+ DRM_ERROR("could not allocate faked DMA buffer!\n");
savage_do_cleanup_bci(dev);
return DRM_ERR(ENOMEM);
}
+ dev_priv->cmd_dma = &dev_priv->fake_dma;
+ dev_priv->dma_flush = savage_fake_dma_flush;
}
dev_priv->sarea_priv =
@@ -578,6 +845,12 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init)
return DRM_ERR(ENOMEM);
}
+ if (savage_dma_init(dev_priv) < 0) {
+ DRM_ERROR("could not initialize command DMA\n");
+ savage_do_cleanup_bci(dev);
+ return DRM_ERR(ENOMEM);
+ }
+
return 0;
}
@@ -585,9 +858,29 @@ int savage_do_cleanup_bci(drm_device_t *dev)
{
drm_savage_private_t *dev_priv = dev->dev_private;
- if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP)
+ if (dev_priv->cmd_dma == &dev_priv->fake_dma) {
+ if (dev_priv->fake_dma.handle)
+ drm_free(dev_priv->fake_dma.handle,
+ SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER);
+ } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle &&
+ dev_priv->cmd_dma->type == _DRM_AGP &&
+ dev_priv->dma_type == SAVAGE_DMA_AGP)
drm_core_ioremapfree(dev_priv->cmd_dma, dev);
+ if (dev_priv->dma_type == SAVAGE_DMA_AGP &&
+ dev->agp_buffer_map && dev->agp_buffer_map->handle) {
+ drm_core_ioremapfree(dev->agp_buffer_map, dev);
+ /* make sure the next instance (which may be running
+ * in PCI mode) doesn't try to use an old
+ * agp_buffer_map. */
+ dev->agp_buffer_map = NULL;
+ }
+
+ if (dev_priv->dma_pages)
+ drm_free(dev_priv->dma_pages,
+ sizeof(drm_savage_dma_page_t)*dev_priv->nr_dma_pages,
+ DRM_MEM_DRIVER);
+
return 0;
}
@@ -651,7 +944,7 @@ int savage_bci_event_wait(DRM_IOCTL_ARGS)
hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
hw_w = dev_priv->event_wrap;
if (hw_e > dev_priv->event_counter)
- hw_w--; /* hardware hasn't passed the last wrap yet */
+ hw_w--; /* hardware hasn't passed the last wrap yet */
event_e = event.count & 0xffff;
event_w = event.count >> 16;
diff --git a/shared-core/savage_drv.h b/shared-core/savage_drv.h
index 2b44e529..ae37a48f 100644
--- a/shared-core/savage_drv.h
+++ b/shared-core/savage_drv.h
@@ -30,10 +30,10 @@
#define DRIVER_NAME "savage"
#define DRIVER_DESC "Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]"
-#define DRIVER_DATE "20050222"
+#define DRIVER_DATE "20050305"
#define DRIVER_MAJOR 2
-#define DRIVER_MINOR 3
+#define DRIVER_MINOR 4
#define DRIVER_PATCHLEVEL 0
/* Interface history:
*
@@ -45,6 +45,8 @@
* 2.3 Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits
* wide and thus very long lived (unlikely to ever wrap). The size
* in the struct was 32 bits before, but only 16 bits were used
+ * 2.4 Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is
+ * actually used
*/
typedef struct drm_savage_age {
@@ -59,6 +61,16 @@ typedef struct drm_savage_buf_priv {
drm_buf_t *buf;
} drm_savage_buf_priv_t;
+typedef struct drm_savage_dma_page {
+ drm_savage_age_t age;
+ unsigned int used;
+} drm_savage_dma_page_t;
+#define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */
+/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command
+ * size of 16kbytes or 4k entries. Minimum requirement would be
+ * 10kbytes for 255 40-byte vertices in one drawing command. */
+#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4)
+
/* interesting bits of hardware state that are saved in dev_priv */
typedef union {
struct drm_savage_common_state {
@@ -143,6 +155,7 @@ typedef struct drm_savage_private {
drm_local_map_t *status;
drm_local_map_t *agp_textures;
drm_local_map_t *cmd_dma;
+ drm_local_map_t fake_dma;
struct {
int handle;
@@ -155,6 +168,10 @@ typedef struct drm_savage_private {
uint16_t event_counter;
unsigned int event_wrap;
+ /* Savage4 command DMA */
+ drm_savage_dma_page_t *dma_pages;
+ unsigned int nr_dma_pages, first_dma_page, current_dma_page;
+
/* saved hw state for global/local check on S3D */
uint32_t hw_draw_ctrl, hw_zbuf_ctrl;
/* and for scissors (global, so don't emit if not changed) */
@@ -172,6 +189,7 @@ typedef struct drm_savage_private {
* Avoid unwanted macro expansion. */
void (*emit_clip_rect)(struct drm_savage_private *dev_priv,
drm_clip_rect_t *pbox);
+ void (*dma_flush)(struct drm_savage_private *dev_priv);
} drm_savage_private_t;
/* ioctls */
@@ -185,6 +203,10 @@ extern int savage_bci_buffers(DRM_IOCTL_ARGS);
extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv,
unsigned int flags);
extern void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf);
+extern void savage_dma_reset(drm_savage_private_t *dev_priv);
+extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page);
+extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv,
+ unsigned int n);
extern int savage_preinit(drm_device_t *dev, unsigned long chipset);
extern int savage_postcleanup(drm_device_t *dev);
extern int savage_do_cleanup_bci(drm_device_t *dev);
@@ -290,6 +312,7 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
/* common stuff */
#define SAVAGE_VERTBUFADDR 0x3e
#define SAVAGE_BITPLANEWTMASK 0xd7
+#define SAVAGE_DMABUFADDR 0x51
/* texture enable bits (needed for tex addr checking) */
#define SAVAGE_TEXCTRL_TEXEN_MASK 0x00010000 /* S3D */
@@ -408,6 +431,8 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
#define BCI_CMD_DRAW_NO_V1 0x00000080
#define BCI_CMD_DRAW_NO_UV1 0x000000c0
+#define BCI_CMD_DMA 0xa8000000
+
#define BCI_W_H(w, h) ((((h) << 16) | (w)) & 0x0FFF0FFF)
#define BCI_X_Y(x, y) ((((y) << 16) | (x)) & 0x0FFF0FFF)
#define BCI_X_W(x, y) ((((w) << 16) | (x)) & 0x0FFF0FFF)
@@ -431,10 +456,17 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
BCI_WRITE(BCI_CMD_SET_REGISTER | \
((uint32_t)(n) & 0xff) << 16 | \
((uint32_t)(first) & 0xffff))
+#define DMA_SET_REGISTERS( first, n ) \
+ DMA_WRITE(BCI_CMD_SET_REGISTER | \
+ ((uint32_t)(n) & 0xff) << 16 | \
+ ((uint32_t)(first) & 0xffff))
#define BCI_DRAW_PRIMITIVE(n, type, skip) \
BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
((n) << 16))
+#define DMA_DRAW_PRIMITIVE(n, type, skip) \
+ DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
+ ((n) << 16))
#define BCI_DRAW_INDICES_S3D(n, type, i0) \
BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \
@@ -444,6 +476,9 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \
(skip) | ((n) << 16))
+#define BCI_DMA(n) \
+ BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1))
+
/*
* access to MMIO
*/
@@ -473,6 +508,54 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
} \
} while(0)
+/*
+ * command DMA support
+ */
+#define SAVAGE_DMA_DEBUG 1
+
+#define DMA_LOCALS uint32_t *dma_ptr;
+
+#define BEGIN_DMA( n ) do { \
+ unsigned int cur = dev_priv->current_dma_page; \
+ unsigned int rest = SAVAGE_DMA_PAGE_SIZE - \
+ dev_priv->dma_pages[cur].used; \
+ if ((n) > rest) { \
+ dma_ptr = savage_dma_alloc(dev_priv, (n)); \
+ } else { /* fast path for small allocations */ \
+ dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + \
+ cur * SAVAGE_DMA_PAGE_SIZE + \
+ dev_priv->dma_pages[cur].used; \
+ if (dev_priv->dma_pages[cur].used == 0) \
+ savage_dma_wait(dev_priv, cur); \
+ dev_priv->dma_pages[cur].used += (n); \
+ } \
+} while(0)
+
+#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val)
+
+#define DMA_COPY_FROM_USER(src,n) do { \
+ DRM_COPY_FROM_USER_UNCHECKED(dma_ptr, (src), (n)*4); \
+ dma_ptr += n; \
+} while(0)
+
+#if SAVAGE_DMA_DEBUG
+#define DMA_COMMIT() do { \
+ unsigned int cur = dev_priv->current_dma_page; \
+ uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle + \
+ cur * SAVAGE_DMA_PAGE_SIZE + \
+ dev_priv->dma_pages[cur].used; \
+ if (dma_ptr != expected) { \
+ DRM_ERROR("DMA allocation and use don't match: " \
+ "%p != %p\n", expected, dma_ptr); \
+ savage_dma_reset(dev_priv); \
+ } \
+} while(0)
+#else
+#define DMA_COMMIT() do {/* nothing */} while(0)
+#endif
+
+#define DMA_FLUSH() dev_priv->dma_flush(dev_priv)
+
/* Buffer aging via event tag
*/
diff --git a/shared-core/savage_state.c b/shared-core/savage_state.c
index f1e424a7..cc386527 100644
--- a/shared-core/savage_state.c
+++ b/shared-core/savage_state.c
@@ -39,15 +39,16 @@ void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv,
((((uint32_t)pbox->y2-1) << 16) & 0x07ff0000);
if (scstart != dev_priv->state.s3d.scstart ||
scend != dev_priv->state.s3d.scend) {
- BCI_LOCALS;
- BEGIN_BCI(4);
- BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
- BCI_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
- BCI_WRITE(scstart);
- BCI_WRITE(scend);
+ DMA_LOCALS;
+ BEGIN_DMA(4);
+ DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
+ DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
+ DMA_WRITE(scstart);
+ DMA_WRITE(scend);
dev_priv->state.s3d.scstart = scstart;
dev_priv->state.s3d.scend = scend;
dev_priv->waiting = 1;
+ DMA_COMMIT();
}
}
@@ -64,15 +65,16 @@ void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
((((uint32_t)pbox->y2-1) << 12) & 0x00fff000);
if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
drawctrl1 != dev_priv->state.s4.drawctrl1) {
- BCI_LOCALS;
- BEGIN_BCI(4);
- BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
- BCI_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
- BCI_WRITE(drawctrl0);
- BCI_WRITE(drawctrl1);
+ DMA_LOCALS;
+ BEGIN_DMA(4);
+ DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
+ DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
+ DMA_WRITE(drawctrl0);
+ DMA_WRITE(drawctrl1);
dev_priv->state.s4.drawctrl0 = drawctrl0;
dev_priv->state.s4.drawctrl1 = drawctrl1;
dev_priv->waiting = 1;
+ DMA_COMMIT();
}
}
@@ -192,7 +194,7 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv,
const drm_savage_cmd_header_t *cmd_header,
const uint32_t __user *regs)
{
- BCI_LOCALS;
+ DMA_LOCALS;
unsigned int count = cmd_header->state.count;
unsigned int start = cmd_header->state.start;
unsigned int count2 = 0;
@@ -244,18 +246,18 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv,
bci_size = count + (count+254)/255 + count2 + (count2+254)/255;
if (cmd_header->state.global) {
- BEGIN_BCI(bci_size+1);
- BCI_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
+ BEGIN_DMA(bci_size+1);
+ DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
dev_priv->waiting = 1;
} else {
- BEGIN_BCI(bci_size);
+ BEGIN_DMA(bci_size);
}
do {
while (count > 0) {
unsigned int n = count < 255 ? count : 255;
- BCI_SET_REGISTERS(start, n);
- BCI_COPY_FROM_USER(regs, n);
+ DMA_SET_REGISTERS(start, n);
+ DMA_COPY_FROM_USER(regs, n);
count -= n;
start += n;
regs += n;
@@ -266,6 +268,8 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv,
count2 = 0;
} while (count);
+ DMA_COMMIT();
+
return 0;
}
@@ -281,6 +285,11 @@ static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv,
unsigned int start = cmd_header->prim.start;
unsigned int i;
+ if (!dmabuf) {
+ DRM_ERROR("called without dma buffers!\n");
+ return DRM_ERR(EINVAL);
+ }
+
if (!n)
return 0;
@@ -335,6 +344,11 @@ static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv,
return DRM_ERR(EINVAL);
}
+ /* Vertex DMA doesn't work with command DMA at the same time,
+ * so we use BCI_... to submit commands here. Flush buffered
+ * faked DMA first. */
+ DMA_FLUSH();
+
if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
BEGIN_BCI(2);
BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
@@ -405,7 +419,7 @@ static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv,
unsigned int vb_size,
unsigned int vb_stride)
{
- BCI_LOCALS;
+ DMA_LOCALS;
unsigned char reorder = 0;
unsigned int prim = cmd_header->prim.prim;
unsigned int skip = cmd_header->prim.skip;
@@ -482,28 +496,32 @@ static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv,
int reorder[3] = {-1, -1, -1};
reorder[start%3] = 2;
- BEGIN_BCI(count*vtx_size+1);
- BCI_DRAW_PRIMITIVE(count, prim, skip);
+ BEGIN_DMA(count*vtx_size+1);
+ DMA_DRAW_PRIMITIVE(count, prim, skip);
for (i = start; i < start+count; ++i) {
unsigned int j = i + reorder[i % 3];
- BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+ DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
vtx_size);
}
+
+ DMA_COMMIT();
} else {
- BEGIN_BCI(count*vtx_size+1);
- BCI_DRAW_PRIMITIVE(count, prim, skip);
+ BEGIN_DMA(count*vtx_size+1);
+ DMA_DRAW_PRIMITIVE(count, prim, skip);
if (vb_stride == vtx_size) {
- BCI_COPY_FROM_USER(&vtxbuf[vb_stride*start],
+ DMA_COPY_FROM_USER(&vtxbuf[vb_stride*start],
vtx_size*count);
} else {
for (i = start; i < start+count; ++i) {
- BCI_COPY_FROM_USER(
+ DMA_COPY_FROM_USER(
&vtxbuf[vb_stride*i],
vtx_size);
}
}
+
+ DMA_COMMIT();
}
start += count;
@@ -527,6 +545,11 @@ static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv,
unsigned int n = cmd_header->idx.count;
unsigned int i;
+ if (!dmabuf) {
+ DRM_ERROR("called without dma buffers!\n");
+ return DRM_ERR(EINVAL);
+ }
+
if (!n)
return 0;
@@ -575,6 +598,11 @@ static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv,
}
}
+ /* Vertex DMA doesn't work with command DMA at the same time,
+ * so we use BCI_... to submit commands here. Flush buffered
+ * faked DMA first. */
+ DMA_FLUSH();
+
if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
BEGIN_BCI(2);
BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
@@ -658,7 +686,7 @@ static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv,
unsigned int vb_size,
unsigned int vb_stride)
{
- BCI_LOCALS;
+ DMA_LOCALS;
unsigned char reorder = 0;
unsigned int prim = cmd_header->idx.prim;
unsigned int skip = cmd_header->idx.skip;
@@ -740,23 +768,27 @@ static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv,
* for correct culling. Only on Savage3D. */
int reorder[3] = {2, -1, -1};
- BEGIN_BCI(count*vtx_size+1);
- BCI_DRAW_PRIMITIVE(count, prim, skip);
+ BEGIN_DMA(count*vtx_size+1);
+ DMA_DRAW_PRIMITIVE(count, prim, skip);
for (i = 0; i < count; ++i) {
unsigned int j = idx[i + reorder[i % 3]];
- BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+ DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
vtx_size);
}
+
+ DMA_COMMIT();
} else {
- BEGIN_BCI(count*vtx_size+1);
- BCI_DRAW_PRIMITIVE(count, prim, skip);
+ BEGIN_DMA(count*vtx_size+1);
+ DMA_DRAW_PRIMITIVE(count, prim, skip);
for (i = 0; i < count; ++i) {
unsigned int j = idx[i];
- BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+ DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
vtx_size);
}
+
+ DMA_COMMIT();
}
usr_idx += count;
@@ -774,7 +806,7 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv,
unsigned int nbox,
const drm_clip_rect_t __user *usr_boxes)
{
- BCI_LOCALS;
+ DMA_LOCALS;
unsigned int flags = cmd_header->clear0.flags, mask, value;
unsigned int clear_cmd;
unsigned int i, nbufs;
@@ -799,9 +831,10 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv,
if (mask != 0xffffffff) {
/* set mask */
- BEGIN_BCI(2);
- BCI_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
- BCI_WRITE(mask);
+ BEGIN_DMA(2);
+ DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+ DMA_WRITE(mask);
+ DMA_COMMIT();
}
for (i = 0; i < nbox; ++i) {
drm_clip_rect_t box;
@@ -811,35 +844,37 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv,
x = box.x1, y = box.y1;
w = box.x2 - box.x1;
h = box.y2 - box.y1;
- BEGIN_BCI(nbufs*6);
+ BEGIN_DMA(nbufs*6);
for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
if (!(flags & buf))
continue;
- BCI_WRITE(clear_cmd);
+ DMA_WRITE(clear_cmd);
switch(buf) {
case SAVAGE_FRONT:
- BCI_WRITE(dev_priv->front_offset);
- BCI_WRITE(dev_priv->front_bd);
+ DMA_WRITE(dev_priv->front_offset);
+ DMA_WRITE(dev_priv->front_bd);
break;
case SAVAGE_BACK:
- BCI_WRITE(dev_priv->back_offset);
- BCI_WRITE(dev_priv->back_bd);
+ DMA_WRITE(dev_priv->back_offset);
+ DMA_WRITE(dev_priv->back_bd);
break;
case SAVAGE_DEPTH:
- BCI_WRITE(dev_priv->depth_offset);
- BCI_WRITE(dev_priv->depth_bd);
+ DMA_WRITE(dev_priv->depth_offset);
+ DMA_WRITE(dev_priv->depth_bd);
break;
}
- BCI_WRITE(value);
- BCI_WRITE(BCI_X_Y(x, y));
- BCI_WRITE(BCI_W_H(w, h));
+ DMA_WRITE(value);
+ DMA_WRITE(BCI_X_Y(x, y));
+ DMA_WRITE(BCI_W_H(w, h));
}
+ DMA_COMMIT();
}
if (mask != 0xffffffff) {
/* reset mask */
- BEGIN_BCI(2);
- BCI_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
- BCI_WRITE(0xffffffff);
+ BEGIN_DMA(2);
+ DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+ DMA_WRITE(0xffffffff);
+ DMA_COMMIT();
}
return 0;
@@ -849,7 +884,7 @@ static int savage_dispatch_swap(drm_savage_private_t *dev_priv,
unsigned int nbox,
const drm_clip_rect_t __user *usr_boxes)
{
- BCI_LOCALS;
+ DMA_LOCALS;
unsigned int swap_cmd;
unsigned int i;
@@ -864,13 +899,14 @@ static int savage_dispatch_swap(drm_savage_private_t *dev_priv,
drm_clip_rect_t box;
DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box));
- BEGIN_BCI(6);
- BCI_WRITE(swap_cmd);
- BCI_WRITE(dev_priv->back_offset);
- BCI_WRITE(dev_priv->back_bd);
- BCI_WRITE(BCI_X_Y(box.x1, box.y1));
- BCI_WRITE(BCI_X_Y(box.x1, box.y1));
- BCI_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1));
+ BEGIN_DMA(6);
+ DMA_WRITE(swap_cmd);
+ DMA_WRITE(dev_priv->back_offset);
+ DMA_WRITE(dev_priv->back_bd);
+ DMA_WRITE(BCI_X_Y(box.x1, box.y1));
+ DMA_WRITE(BCI_X_Y(box.x1, box.y1));
+ DMA_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1));
+ DMA_COMMIT();
}
return 0;
@@ -967,12 +1003,16 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
DRM_COPY_FROM_USER_IOCTL(cmdbuf, (drm_savage_cmdbuf_t __user *)data,
sizeof(cmdbuf));
- if (cmdbuf.dma_idx > dma->buf_count) {
- DRM_ERROR("vertex buffer index %u out of range (0-%u)\n",
- cmdbuf.dma_idx, dma->buf_count-1);
- return DRM_ERR(EINVAL);
+ if (dma && dma->buflist) {
+ if (cmdbuf.dma_idx > dma->buf_count) {
+ DRM_ERROR("vertex buffer index %u out of range (0-%u)\n",
+ cmdbuf.dma_idx, dma->buf_count-1);
+ return DRM_ERR(EINVAL);
+ }
+ dmabuf = dma->buflist[cmdbuf.dma_idx];
+ } else {
+ dmabuf = NULL;
}
- dmabuf = dma->buflist[cmdbuf.dma_idx];
usr_cmdbuf = (drm_savage_cmd_header_t __user *)cmdbuf.cmd_addr;
usr_vtxbuf = (unsigned int __user *)cmdbuf.vb_addr;
@@ -1011,6 +1051,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
if (i + j > cmdbuf.size) {
DRM_ERROR("indexed drawing command extends "
"beyond end of command buffer\n");
+ DMA_FLUSH();
return DRM_ERR(EINVAL);
}
/* fall through */
@@ -1042,6 +1083,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
if (i + j > cmdbuf.size) {
DRM_ERROR("command SAVAGE_CMD_STATE extends "
"beyond end of command buffer\n");
+ DMA_FLUSH();
return DRM_ERR(EINVAL);
}
ret = savage_dispatch_state(
@@ -1054,6 +1096,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
if (i + 1 > cmdbuf.size) {
DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
"beyond end of command buffer\n");
+ DMA_FLUSH();
return DRM_ERR(EINVAL);
}
ret = savage_dispatch_clear(dev_priv, &cmd_header,
@@ -1068,11 +1111,14 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
break;
default:
DRM_ERROR("invalid command 0x%x\n", cmd_header.cmd.cmd);
+ DMA_FLUSH();
return DRM_ERR(EINVAL);
}
- if (ret != 0)
+ if (ret != 0) {
+ DMA_FLUSH();
return ret;
+ }
}
if (first_draw_cmd) {
@@ -1080,11 +1126,15 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
dev_priv, first_draw_cmd, usr_cmdbuf, dmabuf,
usr_vtxbuf, cmdbuf.vb_size, cmdbuf.vb_stride,
cmdbuf.nbox, usr_boxes);
- if (ret != 0)
+ if (ret != 0) {
+ DMA_FLUSH();
return ret;
+ }
}
- if (cmdbuf.discard) {
+ DMA_FLUSH();
+
+ if (dmabuf && cmdbuf.discard) {
drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
uint16_t event;
event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);