diff options
author | Leif Delgass <ldelgass@users.sourceforge.net> | 2002-05-18 08:57:54 +0000 |
---|---|---|
committer | Leif Delgass <ldelgass@users.sourceforge.net> | 2002-05-18 08:57:54 +0000 |
commit | 3a83c18c24d651083258c32e1b0116044359ddf7 (patch) | |
tree | 40f524c6bdee7cfe3a92d2f0f1ad0de49daa3568 | |
parent | c3e3c95731905f973995e3bb1c7afb9f7e5d14df (diff) |
Checkpoint commit of async DMA, blits and AGP texturing. Buffer aging is
done with the pattern registers which is not ideal, but works. There
are still lots of places where optimizing is needed. We need to do the
minimum required to sync with the X server on context switches, since
right now things slow down whenever the mouse is moved.
-rw-r--r-- | linux-core/mach64_drv.c | 22 | ||||
-rw-r--r-- | linux/drm.h | 3 | ||||
-rw-r--r-- | linux/mach64.h | 23 | ||||
-rw-r--r-- | linux/mach64_dma.c | 645 | ||||
-rw-r--r-- | linux/mach64_drm.h | 8 | ||||
-rw-r--r-- | linux/mach64_drv.c | 22 | ||||
-rw-r--r-- | linux/mach64_drv.h | 168 | ||||
-rw-r--r-- | linux/mach64_state.c | 490 | ||||
-rw-r--r-- | shared-core/drm.h | 3 | ||||
-rw-r--r-- | shared/drm.h | 3 |
10 files changed, 920 insertions, 467 deletions
diff --git a/linux-core/mach64_drv.c b/linux-core/mach64_drv.c index 4b44f182..400b9357 100644 --- a/linux-core/mach64_drv.c +++ b/linux-core/mach64_drv.c @@ -44,18 +44,16 @@ #define DRIVER_PATCHLEVEL 0 -#define DRIVER_IOCTLS \ - [DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { mach64_dma_buffers, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_INIT)] = { mach64_dma_init, 1, 1 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_CLEAR)] = { mach64_dma_clear, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_SWAP)] = { mach64_dma_swap, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_IDLE)] = { mach64_dma_idle, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_VERTEX)] = { mach64_dma_vertex, 1, 0 } - -#if 0 - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_BLIT)] = { mach64_blit, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_FLUSH)] = { mach64_flush, 1, 0 }, -#endif +#define DRIVER_IOCTLS \ + [DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { mach64_dma_buffers, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_INIT)] = { mach64_dma_init, 1, 1 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_CLEAR)] = { mach64_dma_clear, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_SWAP)] = { mach64_dma_swap, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_IDLE)] = { mach64_dma_idle, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_RESET)] = { mach64_engine_reset, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_VERTEX)] = { mach64_dma_vertex, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_BLIT)] = { mach64_dma_blit, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_FLUSH)] = { mach64_dma_flush, 1, 0 }, #include "drm_agpsupport.h" #include "drm_auth.h" diff --git a/linux/drm.h b/linux/drm.h index a37870a2..993df3b5 100644 --- a/linux/drm.h +++ b/linux/drm.h @@ -523,7 +523,6 @@ typedef struct drm_scatter_gather { #define DRM_IOCTL_MACH64_SWAP DRM_IO( 0x43) #define DRM_IOCTL_MACH64_CLEAR DRM_IOW( 0x44, drm_mach64_clear_t) #define DRM_IOCTL_MACH64_VERTEX DRM_IOW( 0x45, drm_mach64_vertex_t) -#if 0 #define DRM_IOCTL_MACH64_BLIT DRM_IOW( 0x46, drm_mach64_blit_t) -#endif +#define DRM_IOCTL_MACH64_FLUSH DRM_IO( 0x47) #endif diff --git a/linux/mach64.h b/linux/mach64.h index 244667f4..ad710851 100644 --- a/linux/mach64.h +++ b/linux/mach64.h @@ -46,9 +46,12 @@ /* DMA customization: */ #define __HAVE_DMA 1 +#define __HAVE_DMA_FREELIST 0 +#if 0 #define __HAVE_DMA_IRQ 1 #define __HAVE_DMA_IRQ_BH 1 #define __HAVE_SHARED_IRQ 1 +#endif /* called before installing service routine in _irq_install */ #define DRIVER_PREINSTALL() \ @@ -59,14 +62,16 @@ do { \ tmp = MACH64_READ(MACH64_CRTC_INT_CNTL); \ DRM_DEBUG("Before PREINSTALL: CRTC_INT_CNTL = 0x%08x\n", tmp); \ /* clear active interrupts */ \ - if ( tmp & (MACH64_VBLANK_INT | MACH64_BUSMASTER_EOL_INT) ) { \ + if ( tmp & (MACH64_CRTC_VBLANK_INT \ + | MACH64_CRTC_BUSMASTER_EOL_INT) ) { \ /* ack bits are the same as active interrupt bits, */ \ /* so write back tmp to clear active interrupts */ \ MACH64_WRITE( MACH64_CRTC_INT_CNTL, tmp ); \ } \ \ /* disable interrupts */ \ - tmp &= ~(MACH64_VBLANK_INT_EN | MACH64_BUSMASTER_EOL_INT_EN); \ + tmp &= ~(MACH64_CRTC_VBLANK_INT_EN \ + | MACH64_CRTC_BUSMASTER_EOL_INT_EN); \ MACH64_WRITE( MACH64_CRTC_INT_CNTL, tmp ); \ DRM_DEBUG("After PREINSTALL: CRTC_INT_CNTL = 0x%08x\n", tmp); \ \ @@ -82,14 +87,16 @@ do { \ tmp = MACH64_READ(MACH64_CRTC_INT_CNTL); \ DRM_DEBUG("Before POSTINSTALL: CRTC_INT_CNTL = 0x%08x\n", tmp); \ /* clear active interrupts */ \ - if ( tmp & (MACH64_VBLANK_INT | MACH64_BUSMASTER_EOL_INT) ) { \ + if ( tmp & (MACH64_CRTC_VBLANK_INT \ + | MACH64_CRTC_BUSMASTER_EOL_INT) ) { \ /* ack bits are the same as active interrupt bits, */ \ /* so write back tmp to clear active interrupts */ \ - MACH64_WRITE( MACH64_CRTC_INT_CNTL, tmp ); \ + MACH64_WRITE( MACH64_CRTC_INT_CNTL, tmp ); \ } \ \ /* enable interrupts */ \ - tmp |= MACH64_VBLANK_INT_EN | MACH64_BUSMASTER_EOL_INT_EN; \ + tmp |= (MACH64_CRTC_VBLANK_INT_EN \ + | MACH64_CRTC_BUSMASTER_EOL_INT_EN); \ MACH64_WRITE( MACH64_CRTC_INT_CNTL, tmp ); \ DRM_DEBUG("After POSTINSTALL: CRTC_INT_CNTL = 0x%08x\n", tmp); \ \ @@ -104,14 +111,16 @@ do { \ tmp = MACH64_READ(MACH64_CRTC_INT_CNTL); \ DRM_DEBUG("Before UNINSTALL: CRTC_INT_CNTL = 0x%08x\n", tmp); \ /* clear active interrupts */ \ - if ( tmp & (MACH64_VBLANK_INT | MACH64_BUSMASTER_EOL_INT) ) { \ + if ( tmp & (MACH64_CRTC_VBLANK_INT \ + | MACH64__CRTCBUSMASTER_EOL_INT) ) { \ /* ack bits are the same as active interrupt bits, */ \ /* so write back tmp to clear active interrupts */ \ MACH64_WRITE( MACH64_CRTC_INT_CNTL, tmp ); \ } \ \ /* disable interrupts */ \ - tmp &= ~(MACH64_VBLANK_INT_EN | MACH64_BUSMASTER_EOL_INT_EN); \ + tmp &= ~(MACH64_CRTC_VBLANK_INT_EN \ + | MACH64_CRTC_BUSMASTER_EOL_INT_EN); \ MACH64_WRITE( MACH64_CRTC_INT_CNTL, tmp ); \ DRM_DEBUG("After UNINSTALL: CRTC_INT_CNTL = 0x%08x\n", tmp); \ } \ diff --git a/linux/mach64_dma.c b/linux/mach64_dma.c index 56b2126a..6bc7513d 100644 --- a/linux/mach64_dma.c +++ b/linux/mach64_dma.c @@ -39,11 +39,11 @@ int mach64_do_cleanup_dma( drm_device_t *dev ); int mach64_handle_dma( drm_mach64_private_t *dev_priv ); -int mach64_do_dispatch_dma( drm_mach64_private_t *dev_priv ); int mach64_do_complete_blit( drm_mach64_private_t *dev_priv ); int mach64_do_wait_for_dma( drm_mach64_private_t *dev_priv ); int mach64_do_release_used_buffers( drm_mach64_private_t *dev_priv ); int mach64_init_freelist( drm_device_t *dev ); +void mach64_destroy_freelist( drm_device_t *dev ); static DECLARE_WAIT_QUEUE_HEAD(read_wait); @@ -63,18 +63,18 @@ void mach64_dma_service(int irq, void *device, struct pt_regs *regs) and ack the interrupt accordingly... Set flags for the handler to know that it needs to process accordingly... */ flags = MACH64_READ(MACH64_CRTC_INT_CNTL); - if (flags & MACH64_VBLANK_INT) + if (flags & MACH64_CRTC_VBLANK_INT) { /* VBLANK -- GUI-master dispatch and polling... */ - MACH64_WRITE(MACH64_CRTC_INT_CNTL, flags | MACH64_VBLANK_INT_AK); + MACH64_WRITE(MACH64_CRTC_INT_CNTL, flags | MACH64_CRTC_VBLANK_INT_AK); atomic_inc(&dev_priv->do_gui); - } - if (flags & MACH64_BUSMASTER_EOL_INT) + } + if (flags & MACH64_CRTC_BUSMASTER_EOL_INT) { /* Completion of BLIT op */ - MACH64_WRITE(MACH64_CRTC_INT_CNTL, flags | MACH64_BUSMASTER_EOL_INT_AK); + MACH64_WRITE(MACH64_CRTC_INT_CNTL, flags | MACH64_CRTC_BUSMASTER_EOL_INT_AK); atomic_inc(&dev_priv->do_blit); - } + } /* Check for an error condition in the engine... */ if (MACH64_READ(MACH64_FIFO_STAT) & 0x80000000) { @@ -116,7 +116,7 @@ void mach64_dma_immediate_bh(void *device) /* Check to see if we've been told to handle gui-mastering... */ if (atomic_read(&dev_priv->do_gui) > 0) { - atomic_set(&dev_priv->do_gui, 0); + atomic_set(&dev_priv->do_gui, 0); /* mach64_handle_dma(dev_priv); */ } @@ -187,6 +187,21 @@ int mach64_do_wait_for_dma( drm_mach64_private_t *dev_priv ) return ret; } +int mach64_do_dma_idle( drm_mach64_private_t *dev_priv ) { + int ret; + + /* wait for completion */ + if ( (ret = mach64_do_wait_for_idle( dev_priv )) < 0 ) { + DRM_ERROR( "%s failed\n", __FUNCTION__ ); + mach64_do_release_used_buffers( dev_priv ); + return ret; + } + + /* clean up after pass */ + mach64_do_release_used_buffers( dev_priv ); + return 0; +} + /* Reset the engine. This will stop the DMA if it is running. */ int mach64_do_engine_reset( drm_mach64_private_t *dev_priv ) @@ -282,6 +297,9 @@ void mach64_dump_engine_info( drm_mach64_private_t *dev_priv ) DRM_INFO( " MEM_ADDR_CONFIG = 0x%08x\n", MACH64_READ( MACH64_MEM_ADDR_CONFIG ) ); DRM_INFO( " MEM_BUF_CNTL = 0x%08x\n", MACH64_READ( MACH64_MEM_BUF_CNTL ) ); DRM_INFO( "\n" ); + DRM_INFO( " PAT_REG0 = 0x%08x\n", MACH64_READ( MACH64_PAT_REG0 ) ); + DRM_INFO( " PAT_REG1 = 0x%08x\n", MACH64_READ( MACH64_PAT_REG1 ) ); + DRM_INFO( "\n" ); DRM_INFO( " SCALE_3D_CNTL = 0x%08x\n", MACH64_READ( MACH64_SCALE_3D_CNTL ) ); DRM_INFO( " SCRATCH_REG0 = 0x%08x\n", MACH64_READ( MACH64_SCRATCH_REG0 ) ); DRM_INFO( " SCRATCH_REG1 = 0x%08x\n", MACH64_READ( MACH64_SCRATCH_REG1 ) ); @@ -309,8 +327,7 @@ static int mach64_bm_dma_test( drm_device_t *dev ) u32 data_addr; u32 *table, *data; u32 regs[3], expected[3]; - u32 src_cntl; - int i; + int i, count; DRM_DEBUG( "%s\n", __FUNCTION__ ); @@ -327,32 +344,39 @@ static int mach64_bm_dma_test( drm_device_t *dev ) data_addr = (u32) data_handle; } - src_cntl = MACH64_READ( MACH64_SRC_CNTL ); - src_cntl &= ~MACH64_SRC_BM_ENABLE; - MACH64_WRITE( MACH64_SRC_CNTL, src_cntl ); + MACH64_WRITE( MACH64_SRC_CNTL, 0 ); MACH64_WRITE( MACH64_VERTEX_1_S, 0x00000000 ); MACH64_WRITE( MACH64_VERTEX_1_T, 0x00000000 ); MACH64_WRITE( MACH64_VERTEX_1_W, 0x00000000 ); - + for (i=0; i < 3; i++) { DRM_DEBUG( "(Before DMA Transfer) reg %d = 0x%08x\n", i, MACH64_READ( (MACH64_VERTEX_1_S + i*4) ) ); } - /* 1_90 = VERTEX_1_S, setup 3 sequential reg writes */ /* use only s,t,w vertex registers so we don't have to mask any results */ - data[0] = cpu_to_le32(0x00020190); - data[1] = expected[0] = 0x11111111; - data[2] = expected[1] = 0x22222222; - data[3] = expected[2] = 0x33333333; - data[4] = cpu_to_le32(0x0000006d); /* SRC_CNTL */ - data[5] = cpu_to_le32(src_cntl); + /* fill up a buffer with sets of 3 consecutive writes starting with VERTEX_1_S */ + count = 0; + + data[count++] = cpu_to_le32(0x00020190); /* 1_90 = VERTEX_1_S */ + data[count++] = expected[0] = 0x11111111; + data[count++] = expected[1] = 0x22222222; + data[count++] = expected[2] = 0x33333333; + + while (count < 1020) { + data[count++] = cpu_to_le32(0x00020190); + data[count++] = 0x11111111; + data[count++] = 0x22222222; + data[count++] = 0x33333333; + } + data[count++] = cpu_to_le32(0x0000006d); /* SRC_CNTL */ + data[count++] = 0; DRM_DEBUG( "Preparing table ...\n" ); table[0] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET); table[1] = cpu_to_le32(data_addr); - table[2] = cpu_to_le32(6 * sizeof( u32 ) | 0x80000000 | 0x40000000); + table[2] = cpu_to_le32(count * sizeof( u32 ) | 0x80000000 | 0x40000000); table[3] = 0; DRM_DEBUG( "table[0] = 0x%08x\n", table[0] ); @@ -360,7 +384,7 @@ static int mach64_bm_dma_test( drm_device_t *dev ) DRM_DEBUG( "table[2] = 0x%08x\n", table[2] ); DRM_DEBUG( "table[3] = 0x%08x\n", table[3] ); - for ( i = 0 ; i < 6 ; i++) { + for ( i = 0 ; i < count ; i++) { DRM_DEBUG( " data[%d] = 0x%08x\n", i, data[i] ); } @@ -388,7 +412,7 @@ static int mach64_bm_dma_test( drm_device_t *dev ) MACH64_WRITE( MACH64_BM_GUI_TABLE_CMD, dev_priv->table_addr | MACH64_CIRCULAR_BUF_SIZE_16KB ); - + MACH64_WRITE( MACH64_SRC_CNTL, MACH64_SRC_BM_ENABLE | MACH64_SRC_BM_SYNC | MACH64_SRC_BM_OP_SYSTEM_TO_REG ); @@ -398,6 +422,7 @@ static int mach64_bm_dma_test( drm_device_t *dev ) MACH64_WRITE( MACH64_DST_HEIGHT_WIDTH, 0 ); DRM_INFO( "waiting for idle...\n" ); + if ( ( i = mach64_do_wait_for_idle( dev_priv ) ) ) { /* engine locked up, dump register state and reset */ DRM_INFO( "mach64_do_wait_for_idle failed (result=%d)\n", i); @@ -409,8 +434,9 @@ static int mach64_bm_dma_test( drm_device_t *dev ) DRM_INFO( "returning ...\n" ); return i; } - DRM_INFO( "waiting for idle...done\n" ); + DRM_INFO( "waiting for idle...done\n" ); + /* Check register values to see if the GUI master operation succeeded */ for ( i = 0; i < 3; i++ ) { regs[i] = MACH64_READ( (MACH64_VERTEX_1_S + i*4) ); @@ -523,6 +549,8 @@ static int mach64_do_dma_init( drm_device_t *dev, drm_mach64_init_t *init ) } } + dev->dev_private = (void *) dev_priv; + if ( !init->pseudo_dma ) { /* enable block 1 registers and bus mastering */ MACH64_WRITE( MACH64_BUS_CNTL, @@ -554,11 +582,12 @@ static int mach64_do_dma_init( drm_device_t *dev, drm_mach64_init_t *init ) /* create pci pool for descriptor memory */ DRM_INFO( "Creating pci pool\n"); + dev_priv->table_size = 0x4000; dev_priv->pool = pci_pool_create( "mach64", /* name */ NULL, /* dev */ - 0x4000, /* size - 16KB */ - 0x4000, /* align - 16KB */ - 0x4000, /* alloc - 16KB */ + dev_priv->table_size, /* size - 16KB */ + dev_priv->table_size, /* align - 16KB */ + dev_priv->table_size, /* alloc - 16KB */ SLAB_ATOMIC /* flags */ ); @@ -591,12 +620,23 @@ static int mach64_do_dma_init( drm_device_t *dev, drm_mach64_init_t *init ) MACH64_WRITE( MACH64_BM_GUI_TABLE_CMD, ( dev_priv->table_addr | MACH64_CIRCULAR_BUF_SIZE_16KB ) ); + /* setup offsets for physical address of table start and end */ + dev_priv->table_start = dev_priv->table_addr; + dev_priv->table_end = dev_priv->table_start; + /* setup write pointer to descriptor table */ + dev_priv->table_wptr = (u32 *) dev_priv->cpu_addr_table; + /* try a DMA GUI-mastering pass and fall back to MMIO if it fails */ dev->dev_private = (void *) dev_priv; DRM_INFO( "Starting DMA test...\n"); if ( (ret=mach64_bm_dma_test( dev )) == 0 ) { +#if 1 + dev_priv->driver_mode = MACH64_MODE_DMA_ASYNC; + DRM_INFO( "DMA test succeeded, using asynchronous DMA mode\n"); +#else dev_priv->driver_mode = MACH64_MODE_DMA_SYNC; DRM_INFO( "DMA test succeeded, using synchronous DMA mode\n"); +#endif } else { dev_priv->driver_mode = MACH64_MODE_MMIO; DRM_INFO( "DMA test failed (ret=%d), using pseudo-DMA mode\n", ret ); @@ -606,6 +646,12 @@ static int mach64_do_dma_init( drm_device_t *dev, drm_mach64_init_t *init ) DRM_INFO( "Forcing pseudo-DMA mode\n"); } + dev_priv->sarea_priv->last_frame = 0; + MACH64_WRITE( MACH64_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame ); + + dev_priv->sarea_priv->last_dispatch = 0; + MACH64_WRITE( MACH64_LAST_DISPATCH_REG, dev_priv->sarea_priv->last_dispatch ); + /* Set up the freelist, empty (placeholder), pending, and DMA request queues... */ INIT_LIST_HEAD(&dev_priv->free_list); INIT_LIST_HEAD(&dev_priv->empty_list); @@ -614,11 +660,14 @@ static int mach64_do_dma_init( drm_device_t *dev, drm_mach64_init_t *init ) mach64_init_freelist( dev ); +#if 0 /* Set up for interrupt handling proper- clear state on the handler - * The handler is enabled by the DDX via the DRM(control) ioctl once we return */ + * The handler is enabled by the DDX via the DRM(control) ioctl once we return + */ atomic_set(&dev_priv->do_gui, 0); atomic_set(&dev_priv->do_blit, 0); - atomic_set(&dev_priv->dma_timeout, -1); + atomic_set(&dev_priv->dma_timeout, -1); +#endif dev->dev_private = (void *) dev_priv; @@ -637,9 +686,7 @@ static int mach64_do_dma_init( drm_device_t *dev, drm_mach64_init_t *init ) */ int mach64_handle_dma( drm_mach64_private_t *dev_priv ) { - struct list_head *ptr; - int i; - int timeout; + int timeout; timeout = atomic_read(&dev_priv->dma_timeout); @@ -657,17 +704,6 @@ int mach64_handle_dma( drm_mach64_private_t *dev_priv ) /* Now, check for queued buffers... */ if (!list_empty(&dev_priv->dma_queue)) { - ptr = dev_priv->dma_queue.next; - for(i = 0; i < MACH64_DMA_SIZE && !list_empty(&dev_priv->dma_queue); i++) - { - /* FIXME -- We REALLY need to be doing this based off of not just - a DMA-able size that's tolerable, but also rounding up/down by - what was submitted to us- if the client's submitting 3 buffer - submits, we really want to push all three at the same time to - the DMA channel. */ - list_del(ptr); - list_add_tail(ptr, &dev_priv->pending); - } atomic_set(&dev_priv->dma_timeout, 0); } @@ -675,17 +711,18 @@ int mach64_handle_dma( drm_mach64_private_t *dev_priv ) if (atomic_read(&dev_priv->dma_timeout) == 0) { /* Make sure we're locked and fire off the prepped pass */ - mach64_do_dispatch_dma(dev_priv); + mach64_do_dma_flush(dev_priv); } } else { /* Check to see if we've got a GUI-Master going... */ - if ((timeout > -1) && !(MACH64_READ( MACH64_BUS_CNTL ) & MACH64_BUS_MASTER_DIS)) + if ((timeout > -1) && (MACH64_READ( MACH64_SRC_CNTL ) & MACH64_SRC_BM_ENABLE)) { /* Check for DMA timeout */ if (timeout > MACH64_DMA_TIMEOUT) { + DRM_INFO("%s, dma timed out at: %d", __FUNCTION__, timeout); /* Assume the engine's hung bigtime... */ mach64_do_engine_reset(dev_priv); mach64_do_release_used_buffers(dev_priv); @@ -718,32 +755,44 @@ int mach64_do_complete_blit( drm_mach64_private_t *dev_priv ) /* - Take the pending list and build up a descriptor table for - GUI-Master use, then fire off the DMA engine with the list. - (The list includes a register reset buffer that the DRM - only controls) -*/ - -/* FIXME: need to add commands to terminate DMA at the end of the stream */ - -int mach64_do_dispatch_dma( drm_mach64_private_t *dev_priv ) + * Take the pending list and build up a descriptor table for + * GUI-Master use, then fire off the DMA engine with the list. + * (We add a register reset buffer that the DRM only controls) + */ +static int mach64_do_dispatch_real_dma( drm_mach64_private_t *dev_priv ) { - u32 *table_ptr = (u32 *) dev_priv->cpu_addr_table; - struct list_head *ptr; - drm_mach64_freelist_t *entry; + u32 *table_ptr = dev_priv->table_wptr; + u32 table_start = dev_priv->table_end; + u32 table_end; + int wrapped = 0; + struct list_head *ptr, *tmp; + drm_mach64_freelist_t *entry; drm_buf_t *buf; - int size, i, pages, remainder, tableDwords; + int bytes, pages, remainder, tableDwords; u32 address, page, end_flag; u32 *p; + u32 reg; + int ret, i, t; + + /* Need to wrap ? */ + if ( table_start >= (dev_priv->table_addr + dev_priv->table_size) ) { + table_start = dev_priv->table_addr; + table_ptr = (u32 *)dev_priv->cpu_addr_table; + wrapped = 1; + } + table_end = table_start; tableDwords = 0; - /* Iterate the pending list build a descriptor table accordingly... */ - list_for_each(ptr, &dev_priv->pending) + /* bump the counter for buffer aging */ + dev_priv->sarea_priv->last_dispatch++; + + /* Iterate the queue and build a descriptor table accordingly... */ + list_for_each(ptr, &dev_priv->dma_queue) { entry = list_entry(ptr, drm_mach64_freelist_t, list); buf = entry->buf; - size = buf->used; + bytes = buf->used; if (dev_priv->is_pci) { address = (u32) virt_to_bus((void *)buf->address); @@ -754,10 +803,44 @@ int mach64_do_dispatch_dma( drm_mach64_private_t *dev_priv ) buf->offset); } - pages = (size + DMA_CHUNKSIZE - 1) / DMA_CHUNKSIZE; + if (ptr->next == &dev_priv->dma_queue) { + /* FIXME: Make sure we don't overflow */ + if (MACH64_BUFFER_SIZE - buf->used < 24) { + DRM_ERROR("buffer overflow\n"); + return 0; + } + p[(bytes/4) ] = cpu_to_le32(DMAREG(MACH64_LAST_DISPATCH_REG)); + p[(bytes/4)+1] = cpu_to_le32(dev_priv->sarea_priv->last_dispatch); + reg = MACH64_READ( MACH64_BUS_CNTL ); + reg |= MACH64_BUS_MASTER_DIS | MACH64_BUS_EXT_REG_EN; + p[(bytes/4)+2] = cpu_to_le32(DMAREG(MACH64_BUS_CNTL)); + p[(bytes/4)+3] = cpu_to_le32(reg); + p[(bytes/4)+4] = cpu_to_le32(DMAREG(MACH64_SRC_CNTL)); + p[(bytes/4)+5] = cpu_to_le32(0); + bytes += 24; + } + + pages = (bytes + DMA_CHUNKSIZE - 1) / DMA_CHUNKSIZE; for ( i = 0 ; i < pages-1 ; i++ ) { page = address + i * DMA_CHUNKSIZE; + /* Check to see if we caught up to the last pass */ + for (t = 0; (table_end == dev_priv->table_start) && + t < dev_priv->usec_timeout; t++) { + /* update the hardware's current position */ + GET_RING_HEAD( dev_priv ); + udelay( 1 ); + } + /* See if we timed out */ + if (t == dev_priv->usec_timeout) { + DRM_INFO( "%s: ring wait failed pre-dispatch, resetting...\n", + __FUNCTION__); + mach64_dump_engine_info( dev_priv ); + mach64_do_engine_reset( dev_priv ); + mach64_do_release_used_buffers( dev_priv ); + return -EBUSY; + } + table_ptr[DMA_FRAME_BUF_OFFSET] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET); table_ptr[DMA_SYS_MEM_ADDR] = cpu_to_le32(page); @@ -766,14 +849,38 @@ int mach64_do_dispatch_dma( drm_mach64_private_t *dev_priv ) tableDwords += 4; table_ptr += 4; + table_end += 16; + /* Need to wrap ? */ + if ( table_end >= (dev_priv->table_addr + dev_priv->table_size) ) { + table_end = dev_priv->table_addr; + table_ptr = (u32 *)dev_priv->cpu_addr_table; + wrapped = 1; + } + } + + /* Check to see if we caught up to the last pass */ + for (t = 0; (table_end == dev_priv->table_start) && + t < dev_priv->usec_timeout; t++) { + /* update the hardware's current position */ + GET_RING_HEAD( dev_priv ); + udelay( 1 ); + } + /* See if we timed out */ + if (t == dev_priv->usec_timeout) { + DRM_INFO( "%s: ring wait failed pre-dispatch, resetting...\n", + __FUNCTION__); + mach64_dump_engine_info( dev_priv ); + mach64_do_engine_reset( dev_priv ); + mach64_do_release_used_buffers( dev_priv ); + return -EBUSY; } /* if this is the last buffer, we need to set the final descriptor flag */ - end_flag = (ptr->next == &dev_priv->pending) ? 0x80000000 : 0; + end_flag = (ptr->next == &dev_priv->dma_queue) ? 0x80000000 : 0; /* generate the final descriptor for any remaining commands in this buffer */ page = address + i * DMA_CHUNKSIZE; - remainder = size - i * DMA_CHUNKSIZE; + remainder = bytes - i * DMA_CHUNKSIZE; table_ptr[DMA_FRAME_BUF_OFFSET] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET); table_ptr[DMA_SYS_MEM_ADDR] = cpu_to_le32(page); table_ptr[DMA_COMMAND] = cpu_to_le32(remainder | end_flag | 0x40000000); @@ -781,24 +888,59 @@ int mach64_do_dispatch_dma( drm_mach64_private_t *dev_priv ) tableDwords += 4; table_ptr += 4; + table_end += 16; + /* Need to wrap ? */ + if ( !end_flag && (table_end >= (dev_priv->table_addr + dev_priv->table_size)) ) { + table_end = dev_priv->table_addr; + table_ptr = (u32 *)dev_priv->cpu_addr_table; + wrapped = 1; + } } - + + dev_priv->table_wptr = table_ptr; + dev_priv->table_start = table_start; + dev_priv->table_end = table_end; + /* Now, dispatch the whole lot to the gui-master engine */ /* flush write combining */ mach64_flush_write_combine(); - mach64_do_wait_for_idle( dev_priv ); - - /* enable bus mastering */ + + /* Ensure last pass completed without locking up */ + if ((ret=mach64_do_wait_for_idle( dev_priv )) < 0) { + DRM_INFO( "%s: idle failed before dispatch, resetting engine\n", + __FUNCTION__); + mach64_dump_engine_info( dev_priv ); + mach64_do_engine_reset( dev_priv ); + mach64_do_release_used_buffers( dev_priv ); + return ret; + } + + /* release completed buffers from the last pass */ + mach64_do_release_used_buffers( dev_priv ); + + /* Move everything in the queue to the pending list */ + i = 0; + list_for_each_safe(ptr, tmp, &dev_priv->dma_queue) + { + entry = list_entry(ptr, drm_mach64_freelist_t, list); + entry->age = dev_priv->sarea_priv->last_dispatch; + list_del(ptr); + entry->buf->waiting = 0; + entry->buf->pending = 1; + list_add_tail(ptr, &dev_priv->pending); + i++; + } + + /* enable bus mastering and block 1 registers */ MACH64_WRITE( MACH64_BUS_CNTL, - ( MACH64_READ(MACH64_BUS_CNTL) - & ~MACH64_BUS_MASTER_DIS ) ); - /* enable VBLANK interrupt */ - MACH64_WRITE( MACH64_CRTC_INT_CNTL, MACH64_READ(MACH64_CRTC_INT_CNTL) | - MACH64_VBLANK_INT_EN); - /* reset descriptor table head */ - MACH64_WRITE( MACH64_BM_GUI_TABLE_CMD, ( dev_priv->table_addr + ( MACH64_READ(MACH64_BUS_CNTL) & ~MACH64_BUS_MASTER_DIS ) + | MACH64_BUS_EXT_REG_EN ); + + /* reset descriptor table ring head */ + MACH64_WRITE( MACH64_BM_GUI_TABLE_CMD, ( dev_priv->table_start | MACH64_CIRCULAR_BUF_SIZE_16KB ) ); + /* enable GUI-master operation */ MACH64_WRITE( MACH64_SRC_CNTL, MACH64_SRC_BM_ENABLE | MACH64_SRC_BM_SYNC | @@ -806,38 +948,149 @@ int mach64_do_dispatch_dma( drm_mach64_private_t *dev_priv ) /* kick off the transfer */ MACH64_WRITE( MACH64_DST_HEIGHT_WIDTH, 0 ); -#if 0 - if ( dev_priv->driver_mode == MACH64_MODE_DMA_SYNC ) - if ( mach64_do_wait_for_idle( dev_priv ) ) { - DRM_INFO( "mach64_do_wait_for_idle failed\n" ); - DRM_INFO( "resetting engine ...\n" ); + DRM_DEBUG( "%s: dispatched %d buffers\n", __FUNCTION__, i ); + DRM_DEBUG( "%s: table start:0x%08x end:0x%08x wptr:0x%x %s\n", __FUNCTION__, + dev_priv->table_start, + dev_priv->table_end, + (u32) dev_priv->table_wptr, + wrapped ? "wrapped" : ""); + + if ( dev_priv->driver_mode == MACH64_MODE_DMA_SYNC ) { + if ( (ret = mach64_do_dma_idle( dev_priv )) < 0 ) { + DRM_INFO( "%s: idle failed after dispatch, resetting engine\n", + __FUNCTION__); mach64_dump_engine_info( dev_priv ); mach64_do_engine_reset( dev_priv ); - return -EBUSY; + mach64_do_release_used_buffers( dev_priv ); + return ret; } + mach64_do_release_used_buffers( dev_priv ); } -#endif + return 0; } +static int mach64_do_dispatch_pseudo_dma( drm_mach64_private_t *dev_priv ) +{ + struct list_head *ptr; + struct list_head *tmp; + drm_mach64_freelist_t *entry; + drm_buf_t *buf; + u32 *p; + u32 used, fifo; + int ret; -/* - Release completed, releaseable buffers to the freelist, currently - ignore flags for buffers that aren't flagged for release (shouldn't - be any, but you never know what someone's going to do to us...). -*/ -int mach64_do_release_used_buffers( drm_mach64_private_t *dev_priv ) + if ( (ret=mach64_do_wait_for_idle( dev_priv )) < 0) { + DRM_INFO( "%s: idle failed before dispatch, resetting engine\n", + __FUNCTION__); + mach64_dump_engine_info( dev_priv ); + mach64_do_engine_reset( dev_priv ); + mach64_do_release_used_buffers( dev_priv ); + return ret; + } + + list_for_each_safe(ptr, tmp, &dev_priv->dma_queue) + { + entry = list_entry(ptr, drm_mach64_freelist_t, list); + buf = entry->buf; + + /* Hand feed the buffer to the card via MMIO, waiting for the fifo + * every 16 writes + */ + used = buf->used >> 2; + fifo = 0; + + if (dev_priv->is_pci) { + p = (u32 *) buf->address; + } else { + p = (u32 *)((char *)dev_priv->buffers->handle + buf->offset); + } + + while ( used ) { + u32 reg, count; + + reg = le32_to_cpu(*p++); + used--; + + count = (reg >> 16) + 1; + reg = reg & 0xffff; + reg = MMSELECT( reg ); + while ( count && used ) { + if ( !fifo ) { + if ( (ret=mach64_do_wait_for_fifo( dev_priv, 16 )) < 0 ) { + return ret; + } + + fifo = 16; + } + --fifo; + /* data is already little-endian */ + MACH64_WRITE(reg, le32_to_cpu(*p++)); + used--; + + reg += 4; + count--; + } + } + + list_del(ptr); + entry->buf->waiting = 0; + entry->buf->pending = 1; + list_add_tail(ptr, &dev_priv->pending); + + } + + /* free the "pending" list, since we're done */ + mach64_do_release_used_buffers( dev_priv ); + + DRM_DEBUG( "%s completed\n", __FUNCTION__ ); + return 0; +} + +int mach64_do_dma_flush( drm_mach64_private_t *dev_priv ) { - struct list_head *ptr; - struct list_head *tmp; + DRM_DEBUG("%s\n", __FUNCTION__); + + if (list_empty(&dev_priv->dma_queue)) + return 0; + + dev_priv->sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | + MACH64_UPLOAD_MISC | + MACH64_UPLOAD_CLIPRECTS); - /* Iterate the pending list and shove the whole lot into the freelist... */ + + if (dev_priv->driver_mode == MACH64_MODE_MMIO) + return mach64_do_dispatch_pseudo_dma( dev_priv ); + else + return mach64_do_dispatch_real_dma( dev_priv ); +} + +/* IMPORTANT: This function should only be called when the engine is idle or locked up, + * as it assumes all buffers in the pending list have been completed by the hardware. + */ +int mach64_do_release_used_buffers( drm_mach64_private_t *dev_priv ) +{ + struct list_head *ptr; + struct list_head *tmp; + drm_mach64_freelist_t *entry; + int i; + + if ( list_empty(&dev_priv->pending) ) + return 0; + + /* Iterate the pending list and move all buffers into the freelist... */ + i = 0; list_for_each_safe(ptr, tmp, &dev_priv->pending) { + entry = list_entry(ptr, drm_mach64_freelist_t, list); + entry->buf->pending = 0; list_del(ptr); list_add_tail(ptr, &dev_priv->free_list); + i++; } - + + DRM_DEBUG( "%s: released %d buffers from pending list\n", __FUNCTION__, i ); + return 0; } @@ -869,6 +1122,9 @@ int mach64_do_cleanup_dma( drm_device_t *dev ) DRM_IOREMAPFREE( dev_priv->buffers ); } + DRM_INFO( "destroying dma buffer freelist\n" ); + mach64_destroy_freelist( dev ); + DRM(free)( dev_priv, sizeof(drm_mach64_private_t), DRM_MEM_DRIVER ); dev->dev_private = NULL; @@ -913,8 +1169,24 @@ int mach64_dma_idle( struct inode *inode, struct file *filp, DRM_DEBUG( "%s\n", __FUNCTION__ ); LOCK_TEST_WITH_RETURN( dev ); - - return mach64_do_wait_for_idle( dev_priv ); + + return mach64_do_dma_idle( dev_priv ); +} + +int mach64_dma_flush( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_mach64_private_t *dev_priv = dev->dev_private; + + DRM_DEBUG( "%s\n", __FUNCTION__ ); + + LOCK_TEST_WITH_RETURN( dev ); + + VB_AGE_TEST_WITH_RETURN( dev_priv ); + + return mach64_do_dma_flush( dev_priv ); } int mach64_engine_reset( struct inode *inode, struct file *filp, @@ -940,59 +1212,149 @@ int mach64_engine_reset( struct inode *inode, struct file *filp, int mach64_init_freelist( drm_device_t *dev ) { + drm_device_dma_t *dma = dev->dma; + drm_mach64_private_t *dev_priv = dev->dev_private; + drm_mach64_freelist_t *entry; + struct list_head *ptr; + int i; + + DRM_DEBUG("%s: adding %d buffers to freelist\n", __FUNCTION__, dma->buf_count); + + for ( i = 0 ; i < dma->buf_count ; i++ ) { + if ((entry = (drm_mach64_freelist_t *) DRM(alloc)(sizeof(drm_mach64_freelist_t), DRM_MEM_BUFLISTS)) == NULL) + return -ENOMEM; + memset( entry, 0, sizeof(drm_mach64_freelist_t) ); + entry->buf = dma->buflist[i]; + entry->age = 0; + ptr = &entry->list; + list_add_tail(ptr, &dev_priv->free_list); + } + return 0; } -drm_buf_t *mach64_freelist_get( drm_device_t *dev ) +void mach64_destroy_freelist( drm_device_t *dev ) { - drm_device_dma_t *dma = dev->dma; drm_mach64_private_t *dev_priv = dev->dev_private; - drm_mach64_buf_priv_t *buf_priv; - drm_buf_t *buf; - int i, t; + drm_mach64_freelist_t *entry; + struct list_head *ptr; + struct list_head *tmp; - /* FIXME: Optimize -- use freelist code */ + DRM_DEBUG("%s\n", __FUNCTION__); - for ( i = 0 ; i < dma->buf_count ; i++ ) { - buf = dma->buflist[i]; - buf_priv = buf->dev_private; - if ( buf->pid == 0 ) - return buf; + list_for_each_safe(ptr, tmp, &dev_priv->pending) + { + list_del(ptr); + entry = list_entry(ptr, drm_mach64_freelist_t, list); + DRM(free)(entry, sizeof(*entry), DRM_MEM_BUFLISTS); } -#if 0 - for ( t = 0 ; t < dev_priv->usec_timeout ; t++ ) { - u32 done_age = MACH64_READ( MACH64_LAST_DISPATCH_REG ); - - for ( i = 0 ; i < dma->buf_count ; i++ ) { - buf = dma->buflist[i]; - buf_priv = buf->dev_private; - if ( buf->pending && buf_priv->age <= done_age ) { - /* The buffer has been processed, so it - * can now be used. - */ - buf->pending = 0; - return buf; + list_for_each_safe(ptr, tmp, &dev_priv->dma_queue) + { + list_del(ptr); + entry = list_entry(ptr, drm_mach64_freelist_t, list); + DRM(free)(entry, sizeof(*entry), DRM_MEM_BUFLISTS); + } + + list_for_each_safe(ptr, tmp, &dev_priv->empty_list) + { + list_del(ptr); + entry = list_entry(ptr, drm_mach64_freelist_t, list); + DRM(free)(entry, sizeof(*entry), DRM_MEM_BUFLISTS); + } + + list_for_each_safe(ptr, tmp, &dev_priv->free_list) + { + list_del(ptr); + entry = list_entry(ptr, drm_mach64_freelist_t, list); + DRM(free)(entry, sizeof(*entry), DRM_MEM_BUFLISTS); + } +} + +drm_buf_t *mach64_freelist_get( drm_mach64_private_t *dev_priv ) +{ + drm_mach64_freelist_t *entry; + struct list_head *ptr; + struct list_head *tmp; + int t; + + if ( list_empty(&dev_priv->free_list) ) { + u32 done_age = 0; + if ( list_empty( &dev_priv->pending ) ) { + /* All 3 lists should never be empty - this is here for debugging */ + if ( list_empty( &dev_priv->dma_queue ) ) { + DRM_ERROR( "Couldn't get buffer - all lists empty\n" ); + return NULL; + } else { + /* There's nothing to recover, so flush the queue */ + mach64_do_dma_flush( dev_priv ); } } - udelay( 1 ); + + for ( t = 0 ; t < dev_priv->usec_timeout ; t++ ) { + done_age = MACH64_READ( MACH64_LAST_DISPATCH_REG ); + /* Look for a completed buffer and bail out of the loop + * as soon as we find one -- don't waste time trying + * to free extra bufs here, leave that to do_release_used_buffers + */ + list_for_each_safe(ptr, tmp, &dev_priv->pending) { + entry = list_entry(ptr, drm_mach64_freelist_t, list); + if (entry->age <= done_age && done_age > 0) { + /* found a processed buffer */ + entry->buf->pending = 0; + list_del(ptr); + list_add_tail(ptr, &dev_priv->free_list); + DRM_DEBUG( "%s: freed processed buffer.\n", __FUNCTION__ ); + goto _freelist_entry_found; + } + } + udelay( 1 ); + } + + DRM_ERROR( "timeout waiting for buffers: last dispatch reg: %d last_dispatch: %d\n", done_age, dev_priv->sarea_priv->last_dispatch ); + return NULL; } -#endif - DRM_ERROR( "returning NULL!\n" ); - return NULL; +_freelist_entry_found: + ptr = dev_priv->free_list.next; + list_del(ptr); + entry = list_entry(ptr, drm_mach64_freelist_t, list); + entry->buf->used = 0; + list_add_tail(ptr, &dev_priv->empty_list); + return entry->buf; } -void mach64_freelist_reset( drm_device_t *dev ) +/* Engine must be idle and buffers recleaimed before calling this */ +void mach64_freelist_reset( drm_mach64_private_t *dev_priv ) { - drm_device_dma_t *dma = dev->dma; - int i; + drm_mach64_freelist_t *entry; + struct list_head *ptr; - for ( i = 0 ; i < dma->buf_count ; i++ ) { - drm_buf_t *buf = dma->buflist[i]; - drm_mach64_buf_priv_t *buf_priv = buf->dev_private; - buf_priv->age = 0; + DRM_DEBUG("%s\n", __FUNCTION__); + + list_for_each(ptr, &dev_priv->dma_queue) + { + entry = list_entry(ptr, drm_mach64_freelist_t, list); + entry->age = 0; } + + list_for_each(ptr, &dev_priv->empty_list) + { + entry = list_entry(ptr, drm_mach64_freelist_t, list); + entry->age = 0; + } + + list_for_each(ptr, &dev_priv->free_list) + { + entry = list_entry(ptr, drm_mach64_freelist_t, list); + entry->age = 0; + } + + /* Pending list should be empty if engine is idle and buffers were released */ + if (!list_empty(&dev_priv->pending)) { + DRM_ERROR("Pending list not empty in freelist_reset!\n"); + } + } @@ -1004,9 +1366,10 @@ static int mach64_dma_get_buffers( drm_device_t *dev, drm_dma_t *d ) { int i; drm_buf_t *buf; + drm_mach64_private_t *dev_priv = dev->dev_private; for ( i = d->granted_count ; i < d->request_count ; i++ ) { - buf = mach64_freelist_get( dev ); + buf = mach64_freelist_get( dev_priv ); if ( !buf ) return -EAGAIN; buf->pid = current->pid; @@ -1042,11 +1405,10 @@ int mach64_dma_buffers( struct inode *inode, struct file *filp, drm_device_t *dev = priv->dev; drm_mach64_private_t *dev_priv = dev->dev_private; drm_device_dma_t *dma = dev->dma; - struct list_head *ptr; - drm_mach64_freelist_t *entry; drm_dma_t d; int ret = 0; int i; + drm_buf_t *buf; LOCK_TEST_WITH_RETURN( dev ); @@ -1061,18 +1423,13 @@ int mach64_dma_buffers( struct inode *inode, struct file *filp, { for (i = 0; i < d.send_count ; i++) { - if (!list_empty(&dev_priv->empty_list)) - { - ptr = dev_priv->empty_list.next; - list_del(ptr); - entry = list_entry(ptr, drm_mach64_freelist_t, list); - entry->buf = dma->buflist[d.send_indices[i]]; - list_add_tail(ptr, &dev_priv->dma_queue); - } - else - { - return -EFAULT; + buf = dma->buflist[d.send_indices[i]]; + if (buf->pending) { + DRM_ERROR( "sending pending buffer %d\n", d.send_indices[i] ); + return -EINVAL; } + /* Add buf to queue */ + DMAADVANCE( dev_priv ); } } else diff --git a/linux/mach64_drm.h b/linux/mach64_drm.h index 83b5c6ee..52a49cf8 100644 --- a/linux/mach64_drm.h +++ b/linux/mach64_drm.h @@ -73,6 +73,10 @@ */ #define MACH64_BUFFER_SIZE 16384 +/* Byte offsets for host blit buffer data + */ +#define MACH64_HOSTDATA_BLIT_OFFSET 104 + /* Keep these small for testing. */ #define MACH64_NR_SAREA_CLIPRECTS 8 @@ -191,7 +195,7 @@ typedef struct drm_mach64_vertex { int count; /* Number of vertices in buffer */ int discard; /* Client finished with buffer? */ } drm_mach64_vertex_t; -#if 0 + typedef struct drm_mach64_blit { int idx; int pitch; @@ -200,5 +204,5 @@ typedef struct drm_mach64_blit { unsigned short x, y; unsigned short width, height; } drm_mach64_blit_t; -#endif + #endif diff --git a/linux/mach64_drv.c b/linux/mach64_drv.c index 4b44f182..400b9357 100644 --- a/linux/mach64_drv.c +++ b/linux/mach64_drv.c @@ -44,18 +44,16 @@ #define DRIVER_PATCHLEVEL 0 -#define DRIVER_IOCTLS \ - [DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { mach64_dma_buffers, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_INIT)] = { mach64_dma_init, 1, 1 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_CLEAR)] = { mach64_dma_clear, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_SWAP)] = { mach64_dma_swap, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_IDLE)] = { mach64_dma_idle, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_VERTEX)] = { mach64_dma_vertex, 1, 0 } - -#if 0 - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_BLIT)] = { mach64_blit, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_MACH64_FLUSH)] = { mach64_flush, 1, 0 }, -#endif +#define DRIVER_IOCTLS \ + [DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { mach64_dma_buffers, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_INIT)] = { mach64_dma_init, 1, 1 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_CLEAR)] = { mach64_dma_clear, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_SWAP)] = { mach64_dma_swap, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_IDLE)] = { mach64_dma_idle, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_RESET)] = { mach64_engine_reset, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_VERTEX)] = { mach64_dma_vertex, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_BLIT)] = { mach64_dma_blit, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_MACH64_FLUSH)] = { mach64_dma_flush, 1, 0 }, #include "drm_agpsupport.h" #include "drm_auth.h" diff --git a/linux/mach64_drv.h b/linux/mach64_drv.h index 09a89ecc..01947d26 100644 --- a/linux/mach64_drv.h +++ b/linux/mach64_drv.h @@ -37,6 +37,7 @@ typedef struct drm_mach64_freelist { struct list_head list; /* Linux LIST structure... */ drm_buf_t *buf; + unsigned int age; } drm_mach64_freelist_t; typedef struct drm_mach64_private { @@ -64,11 +65,16 @@ typedef struct drm_mach64_private { atomic_t dma_timeout; /* Number of interrupt dispatches since last DMA dispatch... */ atomic_t do_gui; /* Flag for the bottom half to know what to do... */ atomic_t do_blit; /* Flag for the bottom half to know what to do... */ - - struct pci_pool *pool; - dma_addr_t table_handle; - void *cpu_addr_table; - u32 table_addr; + + /* DMA descriptor table (ring buffer) */ + struct pci_pool *pool; /* DMA memory pool */ + int table_size; /* size of table (ring buffer) in bytes */ + dma_addr_t table_handle; /* handle returned by pci_pool_alloc */ + void *cpu_addr_table; /* virtual address of table head */ + u32 table_addr; /* physical address of table head */ + u32 table_start; /* physical address of start of table ring */ + u32 table_end; /* physical address of end of table ring */ + u32 *table_wptr; /* write pointer to table (ring tail) */ struct list_head free_list; /* Free-list head */ struct list_head empty_list; /* Free-list placeholder list */ @@ -95,13 +101,15 @@ extern int mach64_dma_init( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); extern int mach64_dma_idle( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); +extern int mach64_dma_flush( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ); extern int mach64_engine_reset( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); extern int mach64_dma_buffers( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); -extern void mach64_freelist_reset( drm_device_t *dev ); -extern drm_buf_t *mach64_freelist_get( drm_device_t *dev ); +extern void mach64_freelist_reset( drm_mach64_private_t *dev_priv ); +extern drm_buf_t *mach64_freelist_get( drm_mach64_private_t *dev_priv ); extern int mach64_do_wait_for_fifo( drm_mach64_private_t *dev_priv, int entries ); @@ -109,6 +117,9 @@ extern int mach64_do_wait_for_idle( drm_mach64_private_t *dev_priv ); extern void mach64_dump_engine_info( drm_mach64_private_t *dev_priv ); extern int mach64_do_engine_reset( drm_mach64_private_t *dev_priv ); +extern int mach64_do_dma_idle( drm_mach64_private_t *dev_priv ); +extern int mach64_do_dma_flush( drm_mach64_private_t *dev_priv ); + /* mach64_state.c */ extern int mach64_dma_clear( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); @@ -116,7 +127,8 @@ extern int mach64_dma_swap( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); extern int mach64_dma_vertex( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); - +extern int mach64_dma_blit( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ); /* ================================================================ * Registers @@ -283,6 +295,7 @@ extern int mach64_dma_vertex( struct inode *inode, struct file *filp, #define MACH64_ONE_OVER_AREA_UC 0x0300 #define MACH64_PAT_REG0 0x0680 +#define MACH64_PAT_REG1 0x0684 #define MACH64_SC_LEFT_RIGHT 0x06a8 #define MACH64_SC_TOP_BOTTOM 0x06b4 @@ -347,12 +360,25 @@ extern int mach64_dma_vertex( struct inode *inode, struct file *filp, #define MACH64_Z_OFF_PITCH 0x0548 #define MACH64_CRTC_INT_CNTL 0x0418 -# define MACH64_VBLANK_INT_EN (1 << 1) -# define MACH64_VBLANK_INT (1 << 2) -# define MACH64_VBLANK_INT_AK (1 << 2) -# define MACH64_BUSMASTER_EOL_INT_EN (1 << 24) -# define MACH64_BUSMASTER_EOL_INT (1 << 25) -# define MACH64_BUSMASTER_EOL_INT_AK (1 << 25) +# define MACH64_CRTC_VBLANK_INT_EN (1 << 1) +# define MACH64_CRTC_VBLANK_INT (1 << 2) +# define MACH64_CRTC_VBLANK_INT_AK (1 << 2) +# define MACH64_CRTC_VLINE_INT_EN (1 << 3) +# define MACH64_CRTC_VLINE_INT (1 << 4) +# define MACH64_CRTC_VLINE_INT_AK (1 << 4) +# define MACH64_CRTC_VLINE_SYNC (1 << 5) +# define MACH64_CRTC_FRAME (1 << 6) +# define MACH64_CRTC_SNAPSHOT_INT_EN (1 << 7) +# define MACH64_CRTC_SNAPSHOT_INT (1 << 8) +# define MACH64_CRTC_SNAPSHOT_INT_AK (1 << 8) +# define MACH64_CRTC_BUSMASTER_EOL_INT_EN (1 << 24) +# define MACH64_CRTC_BUSMASTER_EOL_INT (1 << 25) +# define MACH64_CRTC_BUSMASTER_EOL_INT_AK (1 << 25) +# define MACH64_CRTC_GP_INT_EN (1 << 26) +# define MACH64_CRTC_GP_INT (1 << 27) +# define MACH64_CRTC_GP_INT_AK (1 << 27) +# define MACH64_CRTC_VBLANK2_INT (1 << 31) +# define MACH64_CRTC_VBLANK2_INT_AK (1 << 31) #define MACH64_DATATYPE_CI8 2 #define MACH64_DATATYPE_ARGB1555 3 @@ -362,6 +388,10 @@ extern int mach64_dma_vertex( struct inode *inode, struct file *filp, #define MACH64_DATATYPE_RGB8 9 #define MACH64_DATATYPE_ARGB4444 15 +/* Constants */ +#define MACH64_LAST_FRAME_REG MACH64_PAT_REG0 +#define MACH64_LAST_DISPATCH_REG MACH64_PAT_REG1 +#define MACH64_MAX_VB_AGE 0x7fffffff #define MACH64_BASE(reg) ((u32)(dev_priv->mmio->handle)) @@ -403,6 +433,24 @@ do { \ } \ } while (0) +/* Check for high water mark and flush if reached */ +/* FIXME: right now this is needed to ensure free buffers for state emits */ +#define QUEUE_SPACE_TEST_WITH_RETURN( dev_priv ) \ +do { \ + struct list_head *ptr; \ + int ret, queued = 0; \ + if (list_empty(&dev_priv->dma_queue)) goto __queue_space_done; \ + list_for_each(ptr, &dev_priv->dma_queue) { \ + queued++; \ + } \ + if (queued >= MACH64_DMA_SIZE) { \ + DRM_DEBUG("%s: high mark reached: %d\n", __FUNCTION__, MACH64_DMA_SIZE); \ + if ((ret=mach64_do_dma_flush( dev_priv )) < 0) \ + return ret; \ + } \ +__queue_space_done: \ +} while (0) + #define VB_AGE_TEST_WITH_RETURN( dev_priv ) \ do { \ drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv; \ @@ -410,7 +458,7 @@ do { \ int __ret = mach64_do_dma_idle( dev_priv ); \ if ( __ret < 0 ) return __ret; \ sarea_priv->last_dispatch = 0; \ - mach64_freelist_reset( dev ); \ + mach64_freelist_reset( dev_priv ); \ } \ } while (0) @@ -425,7 +473,7 @@ do { \ #define DMA_RESERVED 3 #define MACH64_DMA_TIMEOUT 10 /* 10 vertical retraces should be enough */ -#define MACH64_DMA_SIZE 64 /* 1 MB (64*16kB) should be enough */ +#define MACH64_DMA_SIZE 96 /* Queue high water mark (number of buffers) */ #define DMA_CHUNKSIZE 0x1000 /* 4kB per DMA descriptor */ #define APERTURE_OFFSET 0x7ff800 @@ -434,31 +482,89 @@ do { \ #define mach64_flush_write_combine() mb() -#define DMALOCALS +#define GET_RING_HEAD( dev_priv ) \ +do { \ + dev_priv->table_start = (MACH64_READ(MACH64_BM_GUI_TABLE) & 0xfffffff0); \ + /* BM_GUI_TABLE points to the next descriptor to be processed (pre-incremented) */ \ + if (dev_priv->table_start == dev_priv->table_addr) \ + dev_priv->table_start += (dev_priv->table_size - sizeof(u32)); \ + else \ + dev_priv->table_start -= sizeof(u32); \ +} while (0) + +#define DMALOCALS drm_buf_t *buf = NULL; u32 *p; int outcount = 0 + +#define GETBUFPTR( dev_priv, _p, _buf ) \ +do { \ + if (dev_priv->is_pci) \ + (_p) = (u32 *) (_buf)->address; \ + else \ + (_p) = (u32 *)((char *)dev_priv->buffers->handle + (_buf)->offset); \ +} while(0) +/* FIXME: use a private set of smaller buffers for state emits, clears, and swaps? */ #define DMAGETPTR( dev_priv, n ) \ do { \ if ( MACH64_VERBOSE ) { \ DRM_INFO( "DMAGETPTR( %d ) in %s\n", \ n, __FUNCTION__ ); \ } \ - mach64_do_wait_for_fifo( dev_priv, n ); \ + buf = mach64_freelist_get( dev_priv ); \ + if (buf == NULL) { \ + DRM_ERROR("%s: couldn't get buffer in DMAGETPTR\n", \ + __FUNCTION__ ); \ + return -EAGAIN; \ + } \ + buf->pid = current->pid; \ + outcount = 0; \ + \ + GETBUFPTR( dev_priv, p, buf ); \ } while (0) -#define DMAOUTREG( reg, val ) \ -do { \ - if ( MACH64_VERBOSE ) { \ - DRM_INFO( " DMAOUTREG( 0x%x = 0x%08x )\n", \ - reg, val ); \ - } \ - MACH64_WRITE( reg, val ); \ +#define DMAOUTREG( reg, val ) \ +do { \ + if ( MACH64_VERBOSE ) { \ + DRM_INFO( " DMAOUTREG( 0x%x = 0x%08x )\n", \ + reg, val ); \ + } \ + p[outcount++] = cpu_to_le32(DMAREG(reg)); \ + p[outcount++] = cpu_to_le32((val)); \ + buf->used += 8; \ } while (0) -#define DMAADVANCE( dev_priv ) \ -do { \ - if ( MACH64_VERBOSE ) { \ - DRM_INFO( "DMAADVANCE() in %s\n", __FUNCTION__ ); \ - } \ +#define DMAADVANCE( dev_priv ) \ +do { \ + struct list_head *ptr; \ + drm_mach64_freelist_t *entry; \ + \ + if ( MACH64_VERBOSE ) { \ + DRM_INFO( "DMAADVANCE() in %s\n", __FUNCTION__ ); \ + } \ + \ + if (list_empty(&dev_priv->empty_list)) { \ + DRM_ERROR( "%s: empty placeholder list in DMAADVANCE()\n", \ + __FUNCTION__ ); \ + return -EFAULT; \ + } \ + \ + /* Add the buffer to the DMA queue */ \ + ptr = dev_priv->empty_list.next; \ + list_del(ptr); \ + entry = list_entry(ptr, drm_mach64_freelist_t, list); \ + entry->buf = buf; \ + entry->buf->waiting = 1; \ + list_add_tail(ptr, &dev_priv->dma_queue); \ + \ +} while (0) + +#define DMAFLUSH( dev_priv ) \ +do { \ + int ret; \ + if ( MACH64_VERBOSE ) { \ + DRM_INFO( "DMAFLUSH() in %s\n", __FUNCTION__ ); \ + } \ + if ((ret=mach64_do_dma_flush( dev_priv )) < 0) \ + return ret; \ } while (0) -#endif +#endif /* __MACH64_DRV_H__ */ diff --git a/linux/mach64_state.c b/linux/mach64_state.c index 13cfbc86..443f75f1 100644 --- a/linux/mach64_state.c +++ b/linux/mach64_state.c @@ -37,48 +37,49 @@ * DMA hardware state programming functions */ -static inline void mach64_emit_texture( drm_mach64_private_t *dev_priv ) +static void mach64_print_dirty( const char *msg, unsigned int flags ) { - drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv; - drm_mach64_context_regs_t *regs = &sarea_priv->context_state; - u32 offset = ((regs->tex_size_pitch & 0xf0) >> 2); - - DMALOCALS; - - DMAGETPTR( dev_priv, 4 ); - - DMAOUTREG( MACH64_TEX_SIZE_PITCH, regs->tex_size_pitch ); - DMAOUTREG( MACH64_TEX_CNTL, regs->tex_cntl ); - DMAOUTREG( MACH64_SECONDARY_TEX_OFF, regs->secondary_tex_off ); - DMAOUTREG( MACH64_TEX_0_OFF + offset, regs->tex_offset ); - - DMAADVANCE( dev_priv ); + DRM_DEBUG( "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s\n", + msg, + flags, + (flags & MACH64_UPLOAD_DST_OFF_PITCH) ? "dst_off_pitch, " : "", + (flags & MACH64_UPLOAD_Z_ALPHA_CNTL) ? "z_alpha_cntl, " : "", + (flags & MACH64_UPLOAD_SCALE_3D_CNTL) ? "scale_3d_cntl, " : "", + (flags & MACH64_UPLOAD_DP_FOG_CLR) ? "dp_fog_clr, " : "", + (flags & MACH64_UPLOAD_DP_WRITE_MASK) ? "dp_write_mask, " : "", + (flags & MACH64_UPLOAD_DP_PIX_WIDTH) ? "dp_pix_width, " : "", + (flags & MACH64_UPLOAD_SETUP_CNTL) ? "setup_cntl, " : "", + (flags & MACH64_UPLOAD_MISC) ? "misc, " : "", + (flags & MACH64_UPLOAD_TEXTURE) ? "texture, " : "", + (flags & MACH64_UPLOAD_TEX0IMAGE) ? "tex0 image, " : "", + (flags & MACH64_UPLOAD_TEX1IMAGE) ? "tex1 image, " : "", + (flags & MACH64_UPLOAD_CLIPRECTS) ? "cliprects, " : "" ); } -static inline void mach64_emit_state( drm_mach64_private_t *dev_priv ) +static inline int mach64_emit_state( drm_mach64_private_t *dev_priv ) { drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_mach64_context_regs_t *regs = &sarea_priv->context_state; unsigned int dirty = sarea_priv->dirty; + u32 offset = ((regs->tex_size_pitch & 0xf0) >> 2); DMALOCALS; - DRM_DEBUG( "%s: dirty=0x%08x\n", __FUNCTION__, dirty ); + if ( MACH64_VERBOSE ) { + mach64_print_dirty( __FUNCTION__, dirty ); + } else { + DRM_DEBUG( "%s: dirty=0x%08x\n", __FUNCTION__, dirty ); + } + + DMAGETPTR( dev_priv, 19 ); /* returns on failure to get buffer */ if ( dirty & MACH64_UPLOAD_MISC ) { - DMAGETPTR( dev_priv, 4 ); - DMAOUTREG( MACH64_DP_MIX, regs->dp_mix ); DMAOUTREG( MACH64_DP_SRC, regs->dp_src ); DMAOUTREG( MACH64_CLR_CMP_CNTL, regs->clr_cmp_cntl ); DMAOUTREG( MACH64_GUI_TRAJ_CNTL, regs->gui_traj_cntl ); - - DMAADVANCE( dev_priv ); - sarea_priv->dirty &= ~MACH64_UPLOAD_MISC; } - DMAGETPTR( dev_priv, 9 ); - if ( dirty & MACH64_UPLOAD_DST_OFF_PITCH ) { DMAOUTREG( MACH64_DST_OFF_PITCH, regs->dst_off_pitch ); sarea_priv->dirty &= ~MACH64_UPLOAD_DST_OFF_PITCH; @@ -113,51 +114,34 @@ static inline void mach64_emit_state( drm_mach64_private_t *dev_priv ) sarea_priv->dirty &= ~MACH64_UPLOAD_SETUP_CNTL; } - DMAADVANCE( dev_priv ); - if ( dirty & MACH64_UPLOAD_TEXTURE ) { - mach64_emit_texture( dev_priv ); + DMAOUTREG( MACH64_TEX_SIZE_PITCH, regs->tex_size_pitch ); + DMAOUTREG( MACH64_TEX_CNTL, regs->tex_cntl ); + DMAOUTREG( MACH64_SECONDARY_TEX_OFF, regs->secondary_tex_off ); + DMAOUTREG( MACH64_TEX_0_OFF + offset, regs->tex_offset ); sarea_priv->dirty &= ~MACH64_UPLOAD_TEXTURE; } if ( dirty & MACH64_UPLOAD_CLIPRECTS ) { - DMAGETPTR( dev_priv, 2 ); - DMAOUTREG( MACH64_SC_LEFT_RIGHT, regs->sc_left_right ); DMAOUTREG( MACH64_SC_TOP_BOTTOM, regs->sc_top_bottom ); - - DMAADVANCE( dev_priv ); - sarea_priv->dirty &= ~MACH64_UPLOAD_CLIPRECTS; } -} + DMAADVANCE( dev_priv ); + + sarea_priv->dirty = 0; + + return 0; + +} /* ================================================================ * DMA command dispatch functions */ -static void mach64_print_dirty( const char *msg, unsigned int flags ) -{ - DRM_INFO( "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s\n", - msg, - flags, - (flags & MACH64_UPLOAD_DST_OFF_PITCH) ? "dst_off_pitch, " : "", - (flags & MACH64_UPLOAD_Z_ALPHA_CNTL) ? "z_alpha_cntl, " : "", - (flags & MACH64_UPLOAD_SCALE_3D_CNTL) ? "scale_3d_cntl, " : "", - (flags & MACH64_UPLOAD_DP_FOG_CLR) ? "dp_fog_clr, " : "", - (flags & MACH64_UPLOAD_DP_WRITE_MASK) ? "dp_write_mask, " : "", - (flags & MACH64_UPLOAD_DP_PIX_WIDTH) ? "dp_pix_width, " : "", - (flags & MACH64_UPLOAD_SETUP_CNTL) ? "setup_cntl, " : "", - (flags & MACH64_UPLOAD_MISC) ? "misc, " : "", - (flags & MACH64_UPLOAD_TEXTURE) ? "texture, " : "", - (flags & MACH64_UPLOAD_TEX0IMAGE) ? "tex0 image, " : "", - (flags & MACH64_UPLOAD_TEX1IMAGE) ? "tex1 image, " : "", - (flags & MACH64_UPLOAD_CLIPRECTS) ? "cliprects, " : "" ); -} - -static void mach64_dma_dispatch_clear( drm_device_t *dev, +static int mach64_dma_dispatch_clear( drm_device_t *dev, unsigned int flags, int cx, int cy, int cw, int ch, unsigned int clear_color, @@ -171,6 +155,7 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, u32 fb_bpp, depth_bpp; int i; DMALOCALS; + DRM_DEBUG( "%s\n", __FUNCTION__ ); switch ( dev_priv->fb_bpp ) { @@ -181,7 +166,7 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, fb_bpp = MACH64_DATATYPE_ARGB8888; break; default: - return; + return -EINVAL; } switch ( dev_priv->depth_bpp ) { case 16: @@ -192,9 +177,11 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, depth_bpp = MACH64_DATATYPE_ARGB8888; break; default: - return; + return -EINVAL; } - + + DMAGETPTR( dev_priv, 100 ); /* returns on failure to get buffer */ + for ( i = 0 ; i < nbox ; i++ ) { int x = pbox[i].x1; int y = pbox[i].y1; @@ -208,7 +195,6 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, if ( flags & (MACH64_FRONT | MACH64_BACK) ) { /* Setup for color buffer clears */ - DMAGETPTR( dev_priv, 14 ); DMAOUTREG( MACH64_Z_CNTL, 0 ); DMAOUTREG( MACH64_SCALE_3D_CNTL, 0 ); @@ -228,8 +214,6 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, (fb_bpp << 28)) ); DMAOUTREG( MACH64_DP_FRGD_CLR, clear_color ); - /* FIXME: Use color mask from state info */ - /*DMAOUTREG( MACH64_DP_WRITE_MASK, 0xffffffff );*/ DMAOUTREG( MACH64_DP_WRITE_MASK, ctx->dp_write_mask ); DMAOUTREG( MACH64_DP_MIX, (MACH64_BKGD_MIX_D | MACH64_FRGD_MIX_S) ); @@ -237,13 +221,11 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, MACH64_FRGD_SRC_FRGD_CLR | MACH64_MONO_SRC_ONE) ); - DMAADVANCE( dev_priv ); - + } if ( flags & MACH64_FRONT ) { - DMAGETPTR( dev_priv, 3 ); - + DMAOUTREG( MACH64_DST_OFF_PITCH, dev_priv->front_offset_pitch ); DMAOUTREG( MACH64_DST_X_Y, @@ -251,12 +233,10 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, DMAOUTREG( MACH64_DST_WIDTH_HEIGHT, (h << 16) | w ); - DMAADVANCE( dev_priv ); } if ( flags & MACH64_BACK ) { - DMAGETPTR( dev_priv, 3 ); - + DMAOUTREG( MACH64_DST_OFF_PITCH, dev_priv->back_offset_pitch ); DMAOUTREG( MACH64_DST_X_Y, @@ -264,14 +244,11 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, DMAOUTREG( MACH64_DST_WIDTH_HEIGHT, (h << 16) | w ); - DMAADVANCE( dev_priv ); } - + if ( flags & MACH64_DEPTH ) { /* Setup for depth buffer clear */ - DMAGETPTR( dev_priv, 14 ); - DMAOUTREG( MACH64_Z_CNTL, 0 ); DMAOUTREG( MACH64_SCALE_3D_CNTL, 0 ); @@ -303,13 +280,15 @@ static void mach64_dma_dispatch_clear( drm_device_t *dev, (y << 16) | x ); DMAOUTREG( MACH64_DST_WIDTH_HEIGHT, (h << 16) | w ); - - DMAADVANCE( dev_priv ); } } + + DMAADVANCE( dev_priv ); + + return 0; } -static void mach64_dma_dispatch_swap( drm_device_t *dev ) +static int mach64_dma_dispatch_swap( drm_device_t *dev ) { drm_mach64_private_t *dev_priv = dev->dev_private; drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv; @@ -318,6 +297,7 @@ static void mach64_dma_dispatch_swap( drm_device_t *dev ) u32 fb_bpp; int i; DMALOCALS; + DRM_DEBUG( "%s\n", __FUNCTION__ ); switch ( dev_priv->fb_bpp ) { @@ -330,7 +310,7 @@ static void mach64_dma_dispatch_swap( drm_device_t *dev ) break; } - DMAGETPTR( dev_priv, 12 ); + DMAGETPTR( dev_priv, 13 + nbox * 4 ); /* returns on failure to get buffer */ DMAOUTREG( MACH64_Z_CNTL, 0 ); DMAOUTREG( MACH64_SCALE_3D_CNTL, 0 ); @@ -358,8 +338,6 @@ static void mach64_dma_dispatch_swap( drm_device_t *dev ) DMAOUTREG( MACH64_SRC_OFF_PITCH, dev_priv->back_offset_pitch ); DMAOUTREG( MACH64_DST_OFF_PITCH, dev_priv->front_offset_pitch ); - DMAADVANCE( dev_priv ); - for ( i = 0 ; i < nbox ; i++ ) { int x = pbox[i].x1; int y = pbox[i].y1; @@ -370,56 +348,48 @@ static void mach64_dma_dispatch_swap( drm_device_t *dev ) pbox[i].x1, pbox[i].y1, pbox[i].x2, pbox[i].y2 ); - DMAGETPTR( dev_priv, 4 ); - DMAOUTREG( MACH64_SRC_WIDTH1, w ); DMAOUTREG( MACH64_SRC_Y_X, (x << 16) | y ); DMAOUTREG( MACH64_DST_Y_X, (x << 16) | y ); DMAOUTREG( MACH64_DST_WIDTH_HEIGHT, (h << 16) | w ); - DMAADVANCE( dev_priv ); } -#if 0 /* Increment the frame counter. The client-side 3D driver must * throttle the framerate by waiting for this value before * performing the swapbuffer ioctl. */ dev_priv->sarea_priv->last_frame++; - BEGIN_RING( 2 ); - - OUT_RING( CCE_PACKET0( MACH64_LAST_FRAME_REG, 0 ) ); - OUT_RING( dev_priv->sarea_priv->last_frame ); + DMAOUTREG( MACH64_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame ); + DMAADVANCE( dev_priv ); - ADVANCE_RING(); -#endif + DMAFLUSH( dev_priv ); + return 0; } static int mach64_dma_dispatch_vertex( drm_device_t *dev, drm_buf_t *buf ) { drm_mach64_private_t *dev_priv = dev->dev_private; - drm_mach64_buf_priv_t *buf_priv = buf->dev_private; drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv; - int size = buf->used; - int i = 0; + int ret, i = 0; + /* Don't need DMALOCALS, since buf is a parameter */ + DRM_DEBUG( "%s: buf=%d nbox=%d\n", __FUNCTION__, buf->idx, sarea_priv->nbox ); - if ( 0 ) - mach64_print_dirty( "dispatch_vertex", sarea_priv->dirty ); - if ( buf->used ) { #if 0 - buf_priv->dispatched = 1; - if ( sarea_priv->dirty & ~MACH64_UPLOAD_CLIPRECTS ) { + mach64_emit_state( dev_priv ); + } #else if ( sarea_priv->dirty ) { -#endif - mach64_emit_state( dev_priv ); + ret = mach64_emit_state( dev_priv ); + if (ret < 0) return ret; } +#endif do { #if 0 @@ -429,161 +399,125 @@ static int mach64_dma_dispatch_vertex( drm_device_t *dev, &sarea_priv->boxes[i]); } #endif + /* Add the buffer to the DMA queue */ + DMAADVANCE( dev_priv ); - - if (dev_priv->driver_mode != MACH64_MODE_MMIO) { - int i, pages, remainder, tableDwords; - u32 address, page; - u32 *table_ptr = (u32 *) dev_priv->cpu_addr_table; - u32 *p; - - if (dev_priv->is_pci) { - address = (u32) virt_to_bus((void *)buf->address); - p = (u32 *) buf->address; - } else { - address = (u32) buf->bus_address; - p = (u32 *)((char *)dev_priv->buffers->handle + - buf->offset); - } - - /* SRC_CNTL */ - p[(size/4) ] = cpu_to_le32(0x0000006d); - p[(size/4)+1] = 0x00000000; - size += 8; - pages = (size + DMA_CHUNKSIZE - 1) / DMA_CHUNKSIZE; - tableDwords = 0; - for ( i = 0 ; i < pages-1 ; i++ ) { - page = address + i * DMA_CHUNKSIZE; - - table_ptr[DMA_FRAME_BUF_OFFSET] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET); - table_ptr[DMA_SYS_MEM_ADDR] = cpu_to_le32(page); - table_ptr[DMA_COMMAND] = cpu_to_le32(DMA_CHUNKSIZE | 0x40000000); - table_ptr[DMA_RESERVED] = 0; - - tableDwords += 4; - table_ptr += 4; - } - - /* generate the final descriptor for any remaining commands */ - page = address + i * DMA_CHUNKSIZE; - remainder = size - i * DMA_CHUNKSIZE; - table_ptr[DMA_FRAME_BUF_OFFSET] = cpu_to_le32(MACH64_BM_ADDR + APERTURE_OFFSET); - table_ptr[DMA_SYS_MEM_ADDR] = cpu_to_le32(page); - table_ptr[DMA_COMMAND] = cpu_to_le32(remainder | 0x80000000 | 0x40000000); - table_ptr[DMA_RESERVED] = 0; - - tableDwords += 4; -#if MACH64_VERBOSE - DRM_INFO( "%d bytes, buffer addr: 0x%08x\n", size, address); - table_ptr = (u32 *) dev_priv->cpu_addr_table; - for ( i = 0 ; i < tableDwords / 4 ; i++ ) { - DRM_INFO( " entry: %x addr: %p cmd: 0x%x\n", i, - table_ptr[4*i+1], table_ptr[4*i+2] ); - - /* dump the contents of the buffers */ - if (1) { - int entries = (table_ptr[4*i+2] & 0xffff) / 4, j; - - for ( j = 0 ; j < entries ; j++ ) { - DRM_INFO( " [0x%03x]: 0x%08x\n", j, p[1024*i+j] ); - } - } - } -#endif /* MACH64_VERBOSE */ - /* flush write combining */ - mach64_flush_write_combine(); - mach64_do_wait_for_idle( dev_priv ); - - MACH64_WRITE( MACH64_BUS_CNTL, - ( MACH64_READ(MACH64_BUS_CNTL) - & ~MACH64_BUS_MASTER_DIS ) ); - MACH64_WRITE( MACH64_BM_GUI_TABLE_CMD, ( dev_priv->table_addr - | MACH64_CIRCULAR_BUF_SIZE_16KB ) ); - MACH64_WRITE( MACH64_SRC_CNTL, - MACH64_SRC_BM_ENABLE | MACH64_SRC_BM_SYNC | - MACH64_SRC_BM_OP_SYSTEM_TO_REG ); - MACH64_WRITE( MACH64_DST_HEIGHT_WIDTH, 0 ); - - if ( mach64_do_wait_for_idle( dev_priv ) ) { - DRM_INFO( "mach64_do_wait_for_idle failed\n" ); - DRM_INFO( "resetting engine ...\n" ); - mach64_dump_engine_info( dev_priv ); - mach64_do_engine_reset( dev_priv ); - return -EBUSY; - } - } else { - /* Emit the vertex buffer rendering commands */ - u32 *p; - u32 used = buf->used >> 2; - u32 fifo = 0; - - if (dev_priv->is_pci) { - p = (u32 *) buf->address; - } else { - p = (u32 *)((char *)dev_priv->buffers->handle + buf->offset); - } - - while ( used ) { - u32 reg, count; - - reg = le32_to_cpu(*p++); - used--; - - count = (reg >> 16) + 1; - reg = reg & 0xffff; - reg = MMSELECT( reg ); - - while ( count && used ) { - if ( !fifo ) { - if ( mach64_do_wait_for_fifo( dev_priv, 16 ) < 0 ) - return -EBUSY; - - fifo = 16; - } - - --fifo; - /* data is already little-endian */ - MACH64_WRITE(reg, le32_to_cpu(*p++)); - used--; - - reg += 4; - count--; - } - } - } } while ( ++i < sarea_priv->nbox ); } -#if 0 - if ( buf_priv->discard ) { - buf_priv->age = dev_priv->sarea_priv->last_dispatch; + sarea_priv->dirty &= ~MACH64_UPLOAD_CLIPRECTS; + sarea_priv->nbox = 0; - /* Emit the vertex buffer age */ - DMAGETPTR( dev_priv, 1 ); + return 0; +} - DMAOUTREG( MACH64_LAST_DISPATCH_REG, buf_priv->age ); - - DMAADVANCE( dev_priv ); - buf->pending = 1; - buf->used = 0; - /* FIXME: Check dispatched field */ - buf_priv->dispatched = 0; +static int mach64_dma_dispatch_blit( drm_device_t *dev, + drm_mach64_blit_t *blit ) +{ + drm_mach64_private_t *dev_priv = dev->dev_private; + drm_device_dma_t *dma = dev->dma; + drm_mach64_buf_priv_t *buf_priv; + int dword_shift, dwords; + DMALOCALS; /* declares buf=NULL, p, outcount=0 */ + + /* The compiler won't optimize away a division by a variable, + * even if the only legal values are powers of two. Thus, we'll + * use a shift instead. + */ + switch ( blit->format ) { + case MACH64_DATATYPE_ARGB8888: + dword_shift = 0; + break; + case MACH64_DATATYPE_ARGB1555: + case MACH64_DATATYPE_RGB565: + case MACH64_DATATYPE_ARGB4444: + dword_shift = 1; + break; + case MACH64_DATATYPE_CI8: + case MACH64_DATATYPE_RGB8: + dword_shift = 2; + break; + default: + DRM_ERROR( "invalid blit format %d\n", blit->format ); + return -EINVAL; + } + + /* Dispatch the blit buffer. + * We don't need DMAGETPTR, since we already have one + */ + buf = dma->buflist[blit->idx]; + buf_priv = buf->dev_private; + + if ( buf->pid != current->pid ) { + DRM_ERROR( "process %d using buffer owned by %d\n", + current->pid, buf->pid ); + return -EINVAL; } -#else - buf->used = 0; - buf->pid = 0; + + if ( buf->pending ) { + DRM_ERROR( "sending pending buffer %d\n", blit->idx ); + return -EINVAL; + } +#if 0 + buf_priv->discard = 1; #endif + + if (dev_priv->is_pci) { + p = (u32 *) buf->address; + } else { + p = (u32 *)((char *)dev_priv->buffers->handle + buf->offset); + } - dev_priv->sarea_priv->last_dispatch++; + dwords = (blit->width * blit->height) >> dword_shift; + /* Add in a command for every 16 dwords */ + dwords += ( ( dwords + 15 ) / 16 ); + buf->used = dwords << 2; - sarea_priv->dirty &= ~MACH64_UPLOAD_CLIPRECTS; - sarea_priv->nbox = 0; + /* Blit via the host data registers (gui-master) + * Add state setup at the start of the buffer -- + * the client leaves space for this based on MACH64_HOSTDATA_BLIT_OFFSET + */ + DMAOUTREG( MACH64_Z_CNTL, 0 ); + DMAOUTREG( MACH64_SCALE_3D_CNTL, 0 ); + DMAOUTREG( MACH64_SC_LEFT_RIGHT, 0 | ( 8191 << 16 ) ); /* no scissor */ + DMAOUTREG( MACH64_SC_TOP_BOTTOM, 0 | ( 16383 << 16 ) ); + + DMAOUTREG( MACH64_CLR_CMP_CNTL, 0 ); /* disable */ + DMAOUTREG( MACH64_GUI_TRAJ_CNTL, + MACH64_DST_X_LEFT_TO_RIGHT + | MACH64_DST_Y_TOP_TO_BOTTOM ); + + DMAOUTREG( MACH64_DP_PIX_WIDTH, + ( blit->format << 0 ) /* dst pix width */ + | ( blit->format << 4 ) /* composite pix width */ + | ( blit->format << 8 ) /* src pix width */ + | ( blit->format << 16 ) /* host data pix width */ + | ( blit->format << 28 ) /* scaler/3D pix width */ + ); + + DMAOUTREG( MACH64_DP_WRITE_MASK, 0xffffffff ); /* enable all planes */ + DMAOUTREG( MACH64_DP_MIX, + MACH64_BKGD_MIX_D + | MACH64_FRGD_MIX_S ); + DMAOUTREG( MACH64_DP_SRC, + MACH64_BKGD_SRC_BKGD_CLR + | MACH64_FRGD_SRC_HOST + | MACH64_MONO_SRC_ONE ); + + DMAOUTREG( MACH64_DST_OFF_PITCH, (blit->pitch << 22) | (blit->offset >> 3) ); + DMAOUTREG( MACH64_DST_X_Y, (blit->y << 16) | blit->x ); + DMAOUTREG( MACH64_DST_WIDTH_HEIGHT, (blit->height << 16) | blit->width ); + + DRM_DEBUG( "%s: %d bytes\n", __FUNCTION__, buf->used ); + + /* Add the buffer to the queue */ + DMAADVANCE( dev_priv ); + return 0; } - /* ================================================================ * IOCTL functions */ @@ -596,26 +530,32 @@ int mach64_dma_clear( struct inode *inode, struct file *filp, drm_mach64_private_t *dev_priv = dev->dev_private; drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_mach64_clear_t clear; - DRM_DEBUG( "%s\n", __FUNCTION__ ); + int ret; - LOCK_TEST_WITH_RETURN( dev ); + DRM_DEBUG( "%s: pid=%d\n", __FUNCTION__, current->pid ); + LOCK_TEST_WITH_RETURN( dev ); + if ( copy_from_user( &clear, (drm_mach64_clear_t *) arg, sizeof(clear) ) ) return -EFAULT; + VB_AGE_TEST_WITH_RETURN( dev_priv ); + QUEUE_SPACE_TEST_WITH_RETURN( dev_priv ); + if ( sarea_priv->nbox > MACH64_NR_SAREA_CLIPRECTS ) sarea_priv->nbox = MACH64_NR_SAREA_CLIPRECTS; - mach64_dma_dispatch_clear( dev, clear.flags, + ret = mach64_dma_dispatch_clear( dev, clear.flags, clear.x, clear.y, clear.w, clear.h, clear.clear_color, clear.clear_depth ); /* Make sure we restore the 3D state next time. */ - dev_priv->sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | - MACH64_UPLOAD_MISC); - return 0; + sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | + MACH64_UPLOAD_MISC | + MACH64_UPLOAD_CLIPRECTS); + return ret; } int mach64_dma_swap( struct inode *inode, struct file *filp, @@ -625,20 +565,26 @@ int mach64_dma_swap( struct inode *inode, struct file *filp, drm_device_t *dev = priv->dev; drm_mach64_private_t *dev_priv = dev->dev_private; drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv; - DRM_DEBUG( "%s\n", __FUNCTION__ ); + int ret; + + DRM_DEBUG( "%s: pid=%d\n", __FUNCTION__, current->pid ); LOCK_TEST_WITH_RETURN( dev ); + VB_AGE_TEST_WITH_RETURN( dev_priv ); + QUEUE_SPACE_TEST_WITH_RETURN( dev_priv ); + if ( sarea_priv->nbox > MACH64_NR_SAREA_CLIPRECTS ) sarea_priv->nbox = MACH64_NR_SAREA_CLIPRECTS; - mach64_dma_dispatch_swap( dev ); + ret = mach64_dma_dispatch_swap( dev ); /* Make sure we restore the 3D state next time. */ - dev_priv->sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | - MACH64_UPLOAD_MISC); - return 0; + sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | + MACH64_UPLOAD_MISC | + MACH64_UPLOAD_CLIPRECTS); + return ret; } int mach64_dma_vertex( struct inode *inode, struct file *filp, @@ -667,24 +613,23 @@ int mach64_dma_vertex( struct inode *inode, struct file *filp, __FUNCTION__, current->pid, vertex.idx, vertex.count, vertex.discard ); -#if 0 if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) { DRM_ERROR( "buffer index %d (of %d max)\n", vertex.idx, dma->buf_count - 1 ); return -EINVAL; } +#if 0 if ( vertex.prim < 0 || vertex.prim > R128_CCE_VC_CNTL_PRIM_TYPE_TRI_TYPE2 ) { DRM_ERROR( "buffer prim %d\n", vertex.prim ); return -EINVAL; } - - VB_AGE_TEST_WITH_RETURN( dev_priv ); #endif - + VB_AGE_TEST_WITH_RETURN( dev_priv ); + QUEUE_SPACE_TEST_WITH_RETURN( dev_priv ); + buf = dma->buflist[vertex.idx]; - buf_priv = buf->dev_private; - + if ( buf->pid != current->pid ) { DRM_ERROR( "process %d using buffer owned by %d\n", current->pid, buf->pid ); @@ -697,8 +642,47 @@ int mach64_dma_vertex( struct inode *inode, struct file *filp, buf->used = vertex.count; #if 0 + buf_priv = buf->dev_private; buf_priv->prim = vertex.prim; buf_priv->discard = vertex.discard; #endif return mach64_dma_dispatch_vertex( dev, buf ); } + +int mach64_dma_blit( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_device_dma_t *dma = dev->dma; + drm_mach64_private_t *dev_priv = dev->dev_private; + drm_mach64_blit_t blit; + int ret; + + LOCK_TEST_WITH_RETURN( dev ); + + if ( copy_from_user( &blit, (drm_mach64_blit_t *)arg, + sizeof(blit) ) ) + return -EFAULT; + + DRM_DEBUG( "%s: pid=%d index=%d\n", + __FUNCTION__, current->pid, blit.idx ); + + if ( blit.idx < 0 || blit.idx >= dma->buf_count ) { + DRM_ERROR( "buffer index %d (of %d max)\n", + blit.idx, dma->buf_count - 1 ); + return -EINVAL; + } + + VB_AGE_TEST_WITH_RETURN( dev_priv ); + QUEUE_SPACE_TEST_WITH_RETURN( dev_priv ); + + ret = mach64_dma_dispatch_blit( dev, &blit ); + + dev_priv->sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | + MACH64_UPLOAD_MISC | + MACH64_UPLOAD_CLIPRECTS); + + return ret; + +} diff --git a/shared-core/drm.h b/shared-core/drm.h index a37870a2..993df3b5 100644 --- a/shared-core/drm.h +++ b/shared-core/drm.h @@ -523,7 +523,6 @@ typedef struct drm_scatter_gather { #define DRM_IOCTL_MACH64_SWAP DRM_IO( 0x43) #define DRM_IOCTL_MACH64_CLEAR DRM_IOW( 0x44, drm_mach64_clear_t) #define DRM_IOCTL_MACH64_VERTEX DRM_IOW( 0x45, drm_mach64_vertex_t) -#if 0 #define DRM_IOCTL_MACH64_BLIT DRM_IOW( 0x46, drm_mach64_blit_t) -#endif +#define DRM_IOCTL_MACH64_FLUSH DRM_IO( 0x47) #endif diff --git a/shared/drm.h b/shared/drm.h index a37870a2..993df3b5 100644 --- a/shared/drm.h +++ b/shared/drm.h @@ -523,7 +523,6 @@ typedef struct drm_scatter_gather { #define DRM_IOCTL_MACH64_SWAP DRM_IO( 0x43) #define DRM_IOCTL_MACH64_CLEAR DRM_IOW( 0x44, drm_mach64_clear_t) #define DRM_IOCTL_MACH64_VERTEX DRM_IOW( 0x45, drm_mach64_vertex_t) -#if 0 #define DRM_IOCTL_MACH64_BLIT DRM_IOW( 0x46, drm_mach64_blit_t) -#endif +#define DRM_IOCTL_MACH64_FLUSH DRM_IO( 0x47) #endif |