diff options
-rw-r--r-- | src/r600_exa.c | 602 | ||||
-rw-r--r-- | src/r600_state.h | 54 | ||||
-rw-r--r-- | src/r600_textured_videofuncs.c | 96 | ||||
-rw-r--r-- | src/r6xx_accel.c | 291 | ||||
-rw-r--r-- | src/radeon.h | 5 | ||||
-rw-r--r-- | src/radeon_dri2.c | 4 | ||||
-rw-r--r-- | src/radeon_exa.c | 8 | ||||
-rw-r--r-- | src/radeon_kms.c | 4 |
8 files changed, 824 insertions, 240 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index d2df1db..86da68c 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -128,12 +128,22 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) uint32_t a, r, g, b; float ps_alu_consts[4]; + //return FALSE; + if (pPix->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); if (!R600CheckBPP(pPix->drawable.bitsPerPixel)) RADEON_FALLBACK(("R600CheckDatatype failed\n")); - accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pPix); + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); @@ -156,26 +166,33 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); #endif - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->solid_vs_offset; + accel_state->ps_mc_addr = accel_state->solid_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -183,16 +200,19 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -200,6 +220,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); /* Render setup */ @@ -211,14 +232,16 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pmask |= 1; /* R */ if (pm & 0xff000000) pmask |= 8; /* A */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPix->drawable.height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (pPix->drawable.bitsPerPixel == 8) { cb_conf.format = COLOR_8; @@ -234,14 +257,9 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -256,6 +274,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) FLAT_SHADE_bit | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit); + END_BATCH(); /* PS alu constants */ if (pPix->drawable.bitsPerPixel == 16) { @@ -303,8 +322,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) { R600DoneSolid(pPix); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8); @@ -349,10 +367,12 @@ R600DoneSolid(PixmapPtr pPix) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -360,6 +380,7 @@ R600DoneSolid(PixmapPtr pPix) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); /* Draw */ @@ -375,15 +396,18 @@ R600DoneSolid(PixmapPtr pPix) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); } static void R600DoPrepareCopy(ScrnInfoPtr pScrn, - int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, - int dst_pitch, int dst_width, int dst_height, uint32_t dst_offset, int dst_bpp, + int src_pitch, int src_width, int src_height, + uint32_t src_offset, struct radeon_bo *src_bo, int src_bpp, + int dst_pitch, int dst_width, int dst_height, + uint32_t dst_offset, struct radeon_bo *dst_bo, int dst_bpp, int rop, Pixel planemask) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -400,26 +424,49 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); + accel_state->src_mc_addr[0] = src_offset; + accel_state->src_pitch[0] = src_pitch; + accel_state->src_width[0] = src_width; + accel_state->src_height[0] = src_height; + accel_state->src_bpp[0] = src_bpp; + accel_state->src_bo[0] = src_bo; + accel_state->src_bo[1] = NULL; + + accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); + accel_state->dst_mc_addr = dst_offset; + accel_state->dst_pitch = dst_pitch; + accel_state->dst_height = dst_height; + accel_state->dst_bpp = dst_bpp; + accel_state->dst_bo = dst_bo; + + r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_window_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->copy_vs_offset; + accel_state->ps_mc_addr = accel_state->copy_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -427,16 +474,19 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -444,18 +494,13 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); - accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); - accel_state->src_mc_addr[0] = src_offset; - accel_state->src_pitch[0] = src_pitch; - accel_state->src_width[0] = src_width; - accel_state->src_height[0] = src_height; - accel_state->src_bpp[0] = src_bpp; - /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[0], accel_state->src_mc_addr[0]); + accel_state->src_size[0], accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = 0; @@ -466,6 +511,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; if (src_bpp == 8) { tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_1; /* R */ @@ -512,20 +559,16 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, pmask |= 1; /* R */ if (planemask & 0xff000000) pmask |= 8; /* A */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); - - accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); - accel_state->dst_mc_addr = dst_offset; - accel_state->dst_pitch = dst_pitch; - accel_state->dst_height = dst_height; - accel_state->dst_bpp = dst_bpp; + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = dst_height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (dst_bpp == 8) { cb_conf.format = COLOR_8; cb_conf.comp_swap = 3; /* A */ @@ -540,14 +583,9 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* export tex coord from VS */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -561,6 +599,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, (0x01 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); + END_BATCH(); } static void @@ -589,10 +628,12 @@ R600DoCopy(ScrnInfoPtr pScrn) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -600,6 +641,7 @@ R600DoCopy(ScrnInfoPtr pScrn) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -614,7 +656,8 @@ R600DoCopy(ScrnInfoPtr pScrn) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -631,8 +674,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoCopy(pScrn); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -665,6 +707,8 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; + //return FALSE; + if (pSrc->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); if (pDst->drawable.bitsPerPixel == 24) @@ -676,9 +720,25 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); - - accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + accel_state->same_surface = FALSE; + +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[0] = 0; + accel_state->dst_mc_addr = 0; + accel_state->src_bo[0] = radeon_get_pixmap_bo(pSrc); + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + if (accel_state->dst_bo == accel_state->src_bo[0]) + accel_state->same_surface = TRUE; + } else +#endif + { + accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) + accel_state->same_surface = TRUE; + } accel_state->src_width[0] = pSrc->drawable.width; accel_state->src_height[0] = pSrc->drawable.height; @@ -711,27 +771,48 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->rop = rop; accel_state->planemask = planemask; - if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) { + if (accel_state->same_surface == TRUE) { unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; - accel_state->same_surface = TRUE; - if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); - accel_state->copy_area = NULL; +#if defined(XF86DRM_MODE) + if (info->cs) { + if (accel_state->copy_area_bo) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + } + accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + if (accel_state->copy_area_bo == NULL) { + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, + 0, RADEON_GEM_DOMAIN_VRAM); + if (radeon_cs_space_check(info->cs)) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + accel_state->copy_area = (void*)accel_state->copy_area_bo; + } else +#endif + { + if (accel_state->copy_area) { + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + accel_state->copy_area = NULL; + } + accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); } - accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); - } else { - accel_state->same_surface = FALSE; - + } else R600DoPrepareCopy(pScrn, accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, - accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, + accel_state->src_mc_addr[0], accel_state->src_bo[0], pSrc->drawable.bitsPerPixel, accel_state->dst_pitch, pDst->drawable.width, pDst->drawable.height, - accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, + accel_state->dst_mc_addr, accel_state->dst_bo, pDst->drawable.bitsPerPixel, rop, planemask); - } - return TRUE; } @@ -759,9 +840,20 @@ R600OverlapCopy(PixmapPtr pDst, uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; int i, hchunk, vchunk; + struct radeon_bo *dst_bo = NULL; + +#if defined(XF86DRM_MODE) + if (info->cs) { + dst_offset = 0; + dst_bo = radeon_get_pixmap_bo(pDst); + radeon_cs_space_add_persistent_bo(info->cs, dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } +#endif - if (is_overlap(srcX, srcX + w, srcY, srcY + h, - dstX, dstX + w, dstY, dstY + h)) { + if (is_overlap(srcX, srcX + (w - 1), srcY, srcY + (h - 1), + dstX, dstX + (w - 1), dstY, dstY + (h - 1))) { /* Calculate height/width of non-overlapping area */ hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX); vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY); @@ -774,8 +866,10 @@ R600OverlapCopy(PixmapPtr pDst, if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */ if (srcY > dstY ) { /* diagonal up */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); R600DoCopy(pScrn); @@ -784,8 +878,10 @@ R600OverlapCopy(PixmapPtr pDst, dstY = dstY + vchunk; } else { /* diagonal down */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); R600DoCopy(pScrn); @@ -795,8 +891,10 @@ R600OverlapCopy(PixmapPtr pDst, } else { /* reduce to vertical */ if (srcX > dstX ) { /* diagonal left */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); R600DoCopy(pScrn); @@ -805,8 +903,10 @@ R600OverlapCopy(PixmapPtr pDst, dstX = dstX + hchunk; } else { /* diagonal right */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -821,8 +921,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy right to left */ for (i = w; i > 0; i -= hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -831,8 +933,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy left to right */ for (i = 0; i < w; i += hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); @@ -844,8 +948,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy top to bottom */ for (i = 0; i < h; i += vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > h - i) vchunk = h - i; @@ -856,8 +962,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy bottom to top */ for (i = h; i > 0; i -= vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > i) vchunk = i; @@ -868,8 +976,10 @@ R600OverlapCopy(PixmapPtr pDst, } } else { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); @@ -886,27 +996,46 @@ R600Copy(PixmapPtr pDst, ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_bo *bo = NULL; if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) return; - if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { +#if defined(XF86DRM_MODE) + if (info->cs) + bo = radeon_get_pixmap_bo(pDst); +#endif + + if (accel_state->same_surface && + is_overlap(srcX, srcX + (w - 1), srcY, srcY + (h - 1), + dstX, dstX + (w - 1), dstY, dstY + (h - 1))) { if (accel_state->copy_area) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t orig_offset, tmp_offset; - tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; - orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; - +#if defined(XF86DRM_MODE) + if (info->cs) { + tmp_offset = 0; + orig_offset = 0; + } else +#endif + { + tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; + orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + } R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -914,11 +1043,20 @@ R600Copy(PixmapPtr pDst, R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); } else if (accel_state->same_surface) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); - uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + uint32_t offset; + +#if defined(XF86DRM_MODE) + if (info->cs) + offset = 0; + else +#endif + offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -939,7 +1077,8 @@ R600DoneCopy(PixmapPtr pDst) R600DoCopy(pScrn); if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + if (!info->cs) + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); accel_state->copy_area = NULL; } @@ -1119,7 +1258,16 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, CLEAR (tex_res); CLEAR (tex_samp); - accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[unit] = 0; + accel_state->src_bo[unit] = radeon_get_pixmap_bo(pPix); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[unit], + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } else +#endif + accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * pPix->drawable.height; @@ -1138,7 +1286,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[unit], accel_state->src_mc_addr[unit]); + accel_state->src_size[unit], accel_state->src_mc_addr[unit], + accel_state->src_bo[unit], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = unit; @@ -1150,6 +1299,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_res.base = accel_state->src_mc_addr[unit]; tex_res.mip_base = accel_state->src_mc_addr[unit]; tex_res.format = R600TexFormats[i].card_fmt; + tex_res.bo = accel_state->src_bo[unit]; + tex_res.mip_bo = accel_state->src_bo[unit]; tex_res.request_size = 1; /* component swizzles */ @@ -1418,8 +1569,12 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_config_t cb_conf; shader_config_t vs_conf, ps_conf; + //return FALSE; /* return FALSE; */ + if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) + return FALSE; + if (pMask) { accel_state->msk_pic = pMaskPicture; if (pMaskPicture->componentAlpha) { @@ -1438,7 +1593,15 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, accel_state->src_alpha = FALSE; } - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; @@ -1455,18 +1618,17 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); @@ -1488,16 +1650,31 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, if (pMask) { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_mask_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_mask_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_mask_ps_offset; } else { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + accel_state->comp_ps_offset; } - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_vs_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->vs_mc_addr = accel_state->comp_vs_offset; + else +#endif + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_vs_offset; accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -1506,16 +1683,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 3; vs_conf.stack_size = 1; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; @@ -1523,10 +1703,11 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); + BEGIN_BATCH(9); EREG(accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); @@ -1540,12 +1721,14 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, PER_MRT_BLEND_bit)); EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl); } + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = pDst->drawable.height; cb_conf.base = accel_state->dst_mc_addr; cb_conf.format = dst_format; + cb_conf.bo = accel_state->dst_bo; switch (pDstPicture->format) { case PICT_a8r8g8b8: @@ -1566,13 +1749,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ + BEGIN_BATCH(21); if (pMask) { /* export 2 tex coords from VS */ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); @@ -1599,6 +1777,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, (0x01 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); + END_BATCH(); return TRUE; } @@ -1632,8 +1811,7 @@ static void R600Composite(PixmapPtr pDst, if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24); @@ -1671,8 +1849,7 @@ static void R600Composite(PixmapPtr pDst, } else { if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -1722,6 +1899,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } else { accel_state->vb_size = accel_state->vb_index * 16; vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -1729,6 +1907,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -1737,12 +1916,14 @@ static void R600DoneComposite(PixmapPtr pDst) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); - set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + set_vtx_resource(pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; @@ -1755,9 +1936,11 @@ static void R600DoneComposite(PixmapPtr pDst) wait_3d_idle_clean(pScrn, accel_state->ib); cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); + } Bool @@ -1774,6 +1957,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, int scratch_offset = 0, hpass, temph; char *dst; drmBufPtr scratch; + struct radeon_bo *bo = NULL; if (dst_pitch & 7) return FALSE; @@ -1816,8 +2000,10 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, } /* blit from scratch to vram */ R600DoPrepareCopy(pScrn, - scratch_pitch, w, oldhpass, offset, bpp, - dst_pitch, dst_width, dst_height, dst_mc_addr, bpp, + scratch_pitch, w, oldhpass, + offset, bo, bpp, + dst_pitch, dst_width, dst_height, + dst_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); R600DoCopy(pScrn); @@ -1863,6 +2049,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); int wpass = w * (bpp/8); drmBufPtr scratch; + struct radeon_bo *bo = NULL; /* RV740 seems to be particularly problematic with small xfers */ if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32)) @@ -1880,8 +2067,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1897,8 +2086,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, scratch_offset = scratch->total/2 - scratch_offset; /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr + scratch_offset, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr + scratch_offset, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1940,7 +2131,12 @@ R600Sync(ScreenPtr pScreen, int marker) struct radeon_accel_state *accel_state = info->accel_state; if (accel_state->exaMarkerSynced != marker) { - RADEONWaitForIdleCP(pScrn); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + RADEONWaitForIdleCP(pScrn); accel_state->exaMarkerSynced = marker; } @@ -1957,11 +2153,27 @@ R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) accel_state->shaders = NULL; - accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, - TRUE, NULL, NULL); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->shaders_bo == NULL) { + ErrorF("Allocating shader failed\n"); + return FALSE; + } + return TRUE; + } else +#endif +#endif + { + accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, + TRUE, NULL, NULL); + + if (accel_state->shaders == NULL) + return FALSE; + } - if (accel_state->shaders == NULL) - return FALSE; return TRUE; } @@ -1972,8 +2184,21 @@ R600LoadShaders(ScrnInfoPtr pScrn) struct radeon_accel_state *accel_state = info->accel_state; RADEONChipFamily ChipSet = info->ChipFamily; uint32_t *shader; - - shader = (pointer)((char *)info->FB + accel_state->shaders->offset); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + int ret; + + if (info->cs) { + ret = radeon_bo_map(accel_state->shaders_bo, 1); + if (ret) { + FatalError("failed to map shader %d\n", ret); + return FALSE; + } + shader = accel_state->shaders_bo->ptr; + } else +#endif +#endif + shader = (pointer)((char *)info->FB + accel_state->shaders->offset); /* solid vs --------------------------------------- */ accel_state->solid_vs_offset = 0; @@ -2011,6 +2236,14 @@ R600LoadShaders(ScrnInfoPtr pScrn) accel_state->xv_ps_offset = 4096; R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + radeon_bo_unmap(accel_state->shaders_bo); + } +#endif +#endif + return TRUE; } @@ -2039,7 +2272,6 @@ R600FinishAccess(PixmapPtr pPix, int index) } - Bool R600DrawInit(ScreenPtr pScreen) { @@ -2065,19 +2297,45 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->exa->MarkSync = R600MarkSync; info->accel_state->exa->WaitMarker = R600Sync; - info->accel_state->exa->PrepareAccess = R600PrepareAccess; - info->accel_state->exa->FinishAccess = R600FinishAccess; - - /* AGP seems to have problems with gart transfers */ - if (info->accelDFS) { - info->accel_state->exa->UploadToScreen = R600UploadToScreen; - info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; + info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; + info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; + info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; + info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; + info->accel_state->exa->UploadToScreen = NULL; + info->accel_state->exa->DownloadFromScreen = NULL; + } else +#endif +#endif + { + info->accel_state->exa->PrepareAccess = R600PrepareAccess; + info->accel_state->exa->FinishAccess = R600FinishAccess; + + /* AGP seems to have problems with gart transfers */ + if (info->accelDFS) { + info->accel_state->exa->UploadToScreen = R600UploadToScreen; + info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; + } } info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; #ifdef EXA_SUPPORTS_PREPARE_AUX info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; #endif + +#ifdef XF86DRM_MODE +#ifdef EXA_HANDLES_PIXMAPS + if (info->cs) { + info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS; +//#ifdef EXA_MIXED_PIXMAPS +// info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS; +//#endif + } +#endif +#endif info->accel_state->exa->pixmapOffsetAlign = 256; info->accel_state->exa->pixmapPitchAlign = 256; @@ -2104,11 +2362,21 @@ R600DrawInit(ScreenPtr pScreen) return FALSE; } - if (!info->gartLocation) - return FALSE; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + if (!info->gartLocation) + return FALSE; info->accel_state->XInited3D = FALSE; info->accel_state->copy_area = NULL; + info->accel_state->src_bo[0] = NULL; + info->accel_state->src_bo[1] = NULL; + info->accel_state->dst_bo = NULL; + info->accel_state->copy_area_bo = NULL; + info->accel_state->vb_bo = NULL; if (!R600AllocShaders(pScrn, pScreen)) return FALSE; diff --git a/src/r600_state.h b/src/r600_state.h index 8f20e42..cb039d4 100644 --- a/src/r600_state.h +++ b/src/r600_state.h @@ -50,6 +50,7 @@ typedef struct { int round_mode; int tile_compact; int source_format; + struct radeon_bo *bo; } cb_config_t; /* Depth buffer */ @@ -63,6 +64,7 @@ typedef struct { int tile_surface_en; int tile_compact; int zrange_precision; + struct radeon_bo *bo; } db_config_t; /* Shader */ @@ -79,6 +81,7 @@ typedef struct { int clamp_consts; int export_mode; int uncached_first_inst; + struct radeon_bo *bo; } shader_config_t; /* Vertex buffer / vtx resource */ @@ -94,6 +97,7 @@ typedef struct { int srf_mode_all; int endian; int mem_req_size; + struct radeon_bo *bo; } vtx_resource_t; /* Texture resource */ @@ -129,6 +133,8 @@ typedef struct { int mpeg_clamp; int perf_modulation; int interlaced; + struct radeon_bo *bo; + struct radeon_bo *mip_bo; } tex_resource_t; /* Texture sampler */ @@ -170,12 +176,46 @@ typedef struct { uint32_t num_indices; } draw_config_t; +#if defined(XF86DRM_MODE) +#define BEGIN_BATCH(n) \ +do { \ + if (info->cs) \ + radeon_ddx_cs_start(pScrn, (n), __FILE__, __func__, __LINE__); \ +} while(0) +#define END_BATCH() \ +do { \ + if (info->cs) \ + radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); \ +} while(0) +#define RELOC_BATCH(bo, rd, wd) \ +do { \ + if (info->cs) { \ + int _ret; \ + _ret = radeon_cs_write_reloc(info->cs, (bo), (rd), (wd), 0); \ + if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \ + } \ +} while(0) +#define E32(ib, dword) \ +do { \ + if (info->cs) \ + radeon_cs_write_dword(info->cs, (dword)); \ + else { \ + uint32_t *ib_head = (pointer)(char*)(ib)->address; \ + ib_head[(ib)->used >> 2] = (dword); \ + (ib)->used += 4; \ + } \ +} while (0) +#else +#define BEGIN_BATCH(n) do {} while(0) +#define END_BATCH() do {} while(0) +#define RELOC_BATCH(bo, wd, rd) do {} while(0) #define E32(ib, dword) \ do { \ uint32_t *ib_head = (pointer)(char*)(ib)->address; \ ib_head[(ib)->used >> 2] = (dword); \ (ib)->used += 4; \ } while (0) +#endif #define EFLOAT(ib, val) \ do { \ @@ -243,7 +283,8 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); void set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf); void -cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr); +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain); void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, int crtc, int start, int stop); void @@ -279,9 +320,18 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i void draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); -void +Bool r600_vb_get(ScrnInfoPtr pScrn); void r600_vb_discard(ScrnInfoPtr pScrn); +int +r600_cp_start(ScrnInfoPtr pScrn); + +extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index); +extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index); +extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align); +extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv); +extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix); +extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix); #endif diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 5dc79c9..b6f7b39 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -80,10 +80,12 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -91,6 +93,7 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -105,7 +108,8 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) /* sync destination surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -216,6 +220,19 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) CLEAR (vs_conf); CLEAR (ps_conf); +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->src_mc_addr[0] = 0; + accel_state->src_bo[0] = pPriv->src_bo; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = radeon_get_pixmap_bo(pPixmap); + } else +#endif + { + accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; + accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; + } accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = pPriv->src_pitch; @@ -233,27 +250,33 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) dstyoff = 0; #endif - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->xv_vs_offset; - - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->xv_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->xv_vs_offset; + accel_state->ps_mc_addr = accel_state->xv_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_ps_offset; + } /* PS bool constant */ switch(pPriv->id) { @@ -275,16 +298,19 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; @@ -292,6 +318,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); /* PS alu constants */ @@ -302,12 +329,12 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) switch(pPriv->id) { case FOURCC_YV12: case FOURCC_I420: - accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], - accel_state->src_mc_addr[0]); + accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Y texture */ tex_res.id = 0; @@ -318,6 +345,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_X; /* Y */ @@ -349,7 +378,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, - accel_state->src_mc_addr[0] + pPriv->planev_offset); + accel_state->src_mc_addr[0] + pPriv->planev_offset, + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); tex_res.id = 1; tex_res.format = FMT_8; @@ -373,7 +403,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, - accel_state->src_mc_addr[0] + pPriv->planeu_offset); + accel_state->src_mc_addr[0] + pPriv->planeu_offset, + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); tex_res.id = 2; tex_res.format = FMT_8; @@ -397,12 +428,12 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) case FOURCC_UYVY: case FOURCC_YUY2: default: - accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], - accel_state->src_mc_addr[0]); + accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Y texture */ tex_res.id = 0; @@ -413,6 +444,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8_8; if (pPriv->id == FOURCC_UYVY) @@ -472,17 +505,16 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) } /* Render setup */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ + END_BATCH(); cb_conf.id = 0; - - accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; - cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPixmap->drawable.height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; switch (pPixmap->drawable.bitsPerPixel) { case 16: @@ -506,14 +538,9 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* export tex coords from VS */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -525,7 +552,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) (0x03 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); - + END_BATCH(); vs_alu_consts[0] = 1.0 / pPriv->w; vs_alu_consts[1] = 1.0 / pPriv->h; @@ -559,8 +586,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoneTexturedVideo(pScrn); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 0457f7d..985595e 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -39,6 +39,29 @@ #include "radeon_drm.h" +#if defined(XF86DRM_MODE) +void r600_cs_flush_indirect(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + int ret; + + if (!info->cs->cdw) + return; + + if (info->accel_state->vb_bo) { + radeon_bo_unmap(info->accel_state->vb_bo); + info->accel_state->vb_ptr = NULL; + } + + radeon_cs_emit(info->cs); + radeon_cs_erase(info->cs); + + ret = radeon_cs_space_check(info->cs); + if (ret) + ErrorF("space check failed in flush\n"); +} +#endif + /* Flush the indirect buffer to the kernel for submission to the card */ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) { @@ -47,13 +70,22 @@ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) int start = 0; drm_radeon_indirect_t indirect; +#if defined(XF86DRM_MODE) + if (info->cs) { + r600_cs_flush_indirect(pScrn); + return; + } +#endif + if (!buffer) return; //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", // buffer->idx); while (buffer->used & 0x3c){ + BEGIN_BATCH(1); E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ + END_BATCH(); } //ErrorF("buffer bytes: %d\n", buffer->used); @@ -70,6 +102,23 @@ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) { +#if defined(XF86DRM_MODE) + int ret; + RADEONInfoPtr info = RADEONPTR(pScrn); + if (info->cs) { + if (info->accel_state->vb_bo) + radeon_bo_unmap(info->accel_state->vb_bo); + info->accel_state->vb_bo = NULL; + if (CS_FULL(info->cs)) { + r600_cs_flush_indirect(pScrn); + return; + } + radeon_cs_erase(info->cs); + ret = radeon_cs_space_check(info->cs); + if (ret) + ErrorF("space check failed in flush\n"); + } +#endif if (!ib) return; ib->used = 0; @@ -79,21 +128,26 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) void wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) { + RADEONInfoPtr info = RADEONPTR(pScrn); //flush caches, don't generate timestamp + BEGIN_BATCH(5); PACK3(ib, IT_EVENT_WRITE, 1); E32(ib, CACHE_FLUSH_AND_INV_EVENT); // wait for 3D idle clean EREG(ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit)); + END_BATCH(); } void wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(3); EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); - + END_BATCH(); } void @@ -102,13 +156,16 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) RADEONInfoPtr info = RADEONPTR(pScrn); if (info->ChipFamily < CHIP_FAMILY_RV770) { + BEGIN_BATCH(5); PACK3(ib, IT_START_3D_CMDBUF, 1); E32(ib, 0); - } + } else + BEGIN_BATCH(3); PACK3(ib, IT_CONTEXT_CONTROL, 2); E32(ib, 0x80000000); E32(ib, 0x80000000); + END_BATCH(); wait_3d_idle_clean (pScrn, ib); } @@ -158,6 +215,7 @@ sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); + BEGIN_BATCH(8); PACK0(ib, SQ_CONFIG, 6); E32(ib, sq_config); E32(ib, sq_gpr_resource_mgmt_1); @@ -165,7 +223,7 @@ sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) E32(ib, sq_thread_resource_mgmt); E32(ib, sq_stack_resource_mgmt_1); E32(ib, sq_stack_resource_mgmt_2); - + END_BATCH(); } void @@ -204,14 +262,19 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) h = (cb_conf->h + 7) & ~7; slice = ((cb_conf->w * h) / 64) - 1; + BEGIN_BATCH(3 + 2); EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); + RELOC_BATCH(cb_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); // rv6xx workaround if ((info->ChipFamily > CHIP_FAMILY_R600) && (info->ChipFamily < CHIP_FAMILY_RV770)) { + BEGIN_BATCH(20); PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); E32(ib, (2 << cb_conf->id)); - } + } else + BEGIN_BATCH(18); // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | @@ -223,22 +286,28 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | (0 << FMASK_TILE_MAX_shift))); + END_BATCH(); } void -cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr) +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t cp_coher_size; if (size == 0xffffffff) cp_coher_size = 0xffffffff; else cp_coher_size = ((size + 255) >> 8); + BEGIN_BATCH(5 + 2); PACK3(ib, IT_SURFACE_SYNC, 4); E32(ib, sync_type); E32(ib, cp_coher_size); E32(ib, (mc_addr >> 8)); E32(ib, 10); /* poll interval */ + RELOC_BATCH(bo, rdomains, wdomain); + END_BATCH(); } /* inserts a wait for vline in the command stream */ @@ -250,6 +319,12 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, uint32_t offset; RADEONCrtcPrivatePtr radeon_crtc; + //XXX FIXME +#if defined(XF86DRM_MODE) + if (info->cs) + return; +#endif + if ((crtc < 0) || (crtc > 1)) return; @@ -278,6 +353,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, radeon_crtc = xf86_config->crtc[crtc]->driver_private; + BEGIN_BATCH(10); /* set the VLINE range */ EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, (start << AVIVO_D1MODE_VLINE_START_SHIFT) | @@ -291,11 +367,13 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, E32(ib, 0); // Ref value E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask E32(ib, 10); // Wait interval + END_BATCH(); } void fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | @@ -304,14 +382,21 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) if (fs_conf->dx10_clamp) sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); + RELOC_BATCH(fs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(6); EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); + END_BATCH(); } void vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | @@ -324,14 +409,21 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) if (vs_conf->uncached_first_inst) sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); + RELOC_BATCH(vs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(6); EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); + END_BATCH(); } void ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | @@ -346,35 +438,48 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) if (ps_conf->clamp_consts) sq_pgm_resources |= CLAMP_CONSTS_bit; + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); + RELOC_BATCH(ps_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(9); EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); + END_BATCH(); } void set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) { + RADEONInfoPtr info = RADEONPTR(pScrn); int i; const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); + BEGIN_BATCH(2 + countreg); PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); for (i = 0; i < countreg; i++) EFLOAT(ib, const_buf[i]); + END_BATCH(); } void set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) { + RADEONInfoPtr info = RADEONPTR(pScrn); /* bool register order is: ps, vs, gs; one register each * 1 bits per bool; 32 bools each for ps, vs, gs. */ + BEGIN_BATCH(3); EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); + END_BATCH(); } void set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_vtx_constant_word2; sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | @@ -391,6 +496,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) if (res->srf_mode_all) sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; + BEGIN_BATCH(9 + 2); PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE @@ -399,11 +505,14 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) E32(ib, 0); // 4: n/a E32(ib, 0); // 5: n/a E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE + RELOC_BATCH(res->bo, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); } void set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; uint32_t sq_tex_resource_word5, sq_tex_resource_word6; @@ -453,6 +562,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) if (tex_res->interlaced) sq_tex_resource_word6 |= INTERLACED_bit; + BEGIN_BATCH(9 + 4); PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); E32(ib, sq_tex_resource_word0); E32(ib, sq_tex_resource_word1); @@ -461,11 +571,15 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) E32(ib, sq_tex_resource_word4); E32(ib, sq_tex_resource_word5); E32(ib, sq_tex_resource_word6); + RELOC_BATCH(tex_res->bo, RADEON_GEM_DOMAIN_VRAM, 0); + RELOC_BATCH(tex_res->mip_bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); } void set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | @@ -505,27 +619,34 @@ set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) if (s->type) sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; + BEGIN_BATCH(5); PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); E32(ib, sq_tex_sampler_word0); E32(ib, sq_tex_sampler_word1); E32(ib, sq_tex_sampler_word2); + END_BATCH(); } //XXX deal with clip offsets in clip setup void set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); EREG(ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); } void set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | @@ -533,40 +654,50 @@ set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x EREG(ib, PA_SC_VPORT_SCISSOR_0_BR + id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); + END_BATCH(); } void set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | WINDOW_OFFSET_DISABLE_bit)); EREG(ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); + END_BATCH(); } void set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | WINDOW_OFFSET_DISABLE_bit)); EREG(ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); } void set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); EREG(ib, PA_SC_CLIPRECT_0_BR + id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); + END_BATCH(); } /* @@ -594,6 +725,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) wait_3d_idle(pScrn, ib); // ASIC specific setup, see drm + BEGIN_BATCH(15); if (info->ChipFamily < CHIP_FAMILY_RV770) { EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | (28 << TD_FIFO_CREDIT_shift))); @@ -619,6 +751,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) (4 << DEPTH_CACHELINE_FREE_shift) | 0)); } + END_BATCH(); // SQ sq_conf.ps_prio = 0; @@ -744,6 +877,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) sq_setup(pScrn, ib, &sq_conf); + BEGIN_BATCH(59); EREG(ib, SQ_VTX_BASE_VTX_LOC, 0); EREG(ib, SQ_VTX_START_INST_LOC, 0); @@ -775,6 +909,11 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) (2 << ALPHA_TO_MASK_OFFSET2_shift) | (2 << ALPHA_TO_MASK_OFFSET3_shift))); + + EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + // SX EREG(ib, SX_ALPHA_TEST_CONTROL, 0); EREG(ib, SX_ALPHA_REF, 0); @@ -785,68 +924,86 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) E32(ib, 0x00000000); E32(ib, 0x00000000); E32(ib, 0x00000000); + END_BATCH(); if (info->ChipFamily < CHIP_FAMILY_RV770) { + BEGIN_BATCH(11); PACK0(ib, CB_FOG_RED, 3); E32(ib, 0x00000000); E32(ib, 0x00000000); E32(ib, 0x00000000); + PACK0(ib, CB_CLEAR_RED, 4); + EFLOAT(ib, 1.0); /* WTF? */ + EFLOAT(ib, 0.0); + EFLOAT(ib, 1.0); + EFLOAT(ib, 1.0); + END_BATCH(); } + BEGIN_BATCH(18); PACK0(ib, CB_CLRCMP_CONTROL, 4); E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC E32(ib, 0); // CB_CLRCMP_SRC E32(ib, 0); // CB_CLRCMP_DST E32(ib, 0); // CB_CLRCMP_MSK - - if (info->ChipFamily < CHIP_FAMILY_RV770) { - PACK0(ib, CB_CLEAR_RED, 4); - EFLOAT(ib, 1.0); /* WTF? */ - EFLOAT(ib, 0.0); - EFLOAT(ib, 1.0); - EFLOAT(ib, 1.0); - } EREG(ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); + EREG(ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + // SC EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | (0 << WINDOW_Y_OFFSET_shift))); EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); + END_BATCH(); /* clip boolean is set to always visible -> doesn't matter */ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); + BEGIN_BATCH(3); if (info->ChipFamily < CHIP_FAMILY_RV770) EREG(ib, R7xx_PA_SC_EDGERULE, 0x00000000); else EREG(ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); + END_BATCH(); for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) { set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); + BEGIN_BATCH(4); PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2); EFLOAT(ib, 0.0); EFLOAT(ib, 1.0); + END_BATCH(); } + BEGIN_BATCH(15); if (info->ChipFamily < CHIP_FAMILY_RV770) EREG(ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); else EREG(ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 0x00500000)); /* ? */ + EREG(ib, PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + + EREG(ib, PA_SC_LINE_CNTL, 0); EREG(ib, PA_SC_AA_CONFIG, 0); EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); + END_BATCH(); //XXX: double check this if (info->ChipFamily > CHIP_FAMILY_R600) { + BEGIN_BATCH(6); EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0); EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0); + END_BATCH(); } + BEGIN_BATCH(83); EREG(ib, PA_SC_LINE_STIPPLE, 0); EREG(ib, PA_SC_MPASS_PS_CNTL, 0); @@ -867,6 +1024,10 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ + /* Scissor / viewport */ + EREG(ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + EREG(ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + // SU EREG(ib, PA_SU_SC_MODE_CNTL, FACE_bit); EREG(ib, PA_SU_POINT_SIZE, 0); @@ -892,17 +1053,19 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EREG(ib, SPI_FOG_CNTL, 0); EREG(ib, SPI_FOG_FUNC_SCALE, 0); EREG(ib, SPI_FOG_FUNC_BIAS, 0); + END_BATCH(); // clear FS + fs_conf.bo = accel_state->shaders_bo; fs_setup(pScrn, ib, &fs_conf); // VGT + BEGIN_BATCH(75); EREG(ib, VGT_MAX_VTX_INDX, 2048); /* XXX set to a reasonably large number of indices */ EREG(ib, VGT_MIN_VTX_INDX, 0); EREG(ib, VGT_INDX_OFFSET, 0); EREG(ib, VGT_INSTANCE_STEP_RATE_0, 0); EREG(ib, VGT_INSTANCE_STEP_RATE_1, 0); - EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0); EREG(ib, VGT_OUTPUT_PATH_CNTL, 0); EREG(ib, VGT_GS_MODE, 0); @@ -923,7 +1086,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EREG(ib, VGT_REUSE_OFF, 0); EREG(ib, VGT_VTX_CNT_EN, 0); EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); - + END_BATCH(); } @@ -934,14 +1097,9 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) void draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t i, count; - EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); - PACK3(ib, IT_INDEX_TYPE, 1); - E32(ib, draw_conf->index_type); - PACK3(ib, IT_NUM_INSTANCES, 1); - E32(ib, draw_conf->num_instances); - // calculate num of packets count = 2; if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) @@ -949,6 +1107,13 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i else count += draw_conf->num_indices; + BEGIN_BATCH(8 + count); + EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); + PACK3(ib, IT_INDEX_TYPE, 1); + E32(ib, draw_conf->index_type); + PACK3(ib, IT_NUM_INSTANCES, 1); + E32(ib, draw_conf->num_instances); + PACK3(ib, IT_DRAW_INDEX_IMMD, count); E32(ib, draw_conf->num_indices); E32(ib, draw_conf->vgt_draw_initiator); @@ -964,12 +1129,15 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i for (i = 0; i < draw_conf->num_indices; i++) E32(ib, indices[i]); } + END_BATCH(); } void draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(10); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); E32(ib, draw_conf->index_type); @@ -978,24 +1146,89 @@ draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) PACK3(ib, IT_DRAW_INDEX_AUTO, 2); E32(ib, draw_conf->num_indices); E32(ib, draw_conf->vgt_draw_initiator); + END_BATCH(); } -void +Bool r600_vb_get(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; - - accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + - (accel_state->ib->idx * accel_state->ib->total) + - (accel_state->ib->total / 2); - accel_state->vb_total = (accel_state->ib->total / 2); - accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + - (accel_state->ib->total / 2)); +#if defined(XF86DRM_MODE) + int ret; + if (info->cs) { + if (accel_state->vb_bo == NULL) { + accel_state->vb_mc_addr = 0; + accel_state->vb_bo = radeon_bo_open(info->bufmgr, 0, 16 * 1024, + 0, RADEON_GEM_DOMAIN_GTT, 0); + if (accel_state->vb_bo == NULL) + return FALSE; + accel_state->vb_total = 16 * 1024; + } + if (!accel_state->vb_ptr) { + ret = radeon_bo_map(accel_state->vb_bo, 1); + if (ret) { + FatalError("failed to vb %d\n", ret); + return FALSE; + } + accel_state->vb_ptr = accel_state->vb_bo->ptr; + } + } else +#endif + { + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx*accel_state->ib->total)+ + (accel_state->ib->total / 2); + accel_state->vb_total = (accel_state->ib->total / 2); + accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + + (accel_state->ib->total / 2)); + } accel_state->vb_index = 0; + return TRUE; } void r600_vb_discard(ScrnInfoPtr pScrn) { } + +int +r600_cp_start(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + +#if defined(XF86DRM_MODE) + if (info->cs) { + if (!r600_vb_get(pScrn)) + return -1; + radeon_cs_space_reset_bos(info->cs); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->src_bo[0]) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[0], + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->src_bo[1]) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[1], + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->dst_bo) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->vb_bo) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (accel_state->copy_area_bo) + radeon_cs_space_add_persistent_bo(info->cs, + accel_state->copy_area_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } else +#endif + { + accel_state->ib = RADEONCPGetBuffer(pScrn); + if (!r600_vb_get(pScrn)) { + return -1; + } + } + return 0; +} diff --git a/src/radeon.h b/src/radeon.h index 3a3631e..7fdd8f5 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -696,9 +696,11 @@ struct radeon_accel_state { int vb_total; void *vb_ptr; uint32_t vb_size; + struct radeon_bo *vb_bo; // shader storage ExaOffscreenArea *shaders; + struct radeon_bo *shaders_bo; uint32_t solid_vs_offset; uint32_t solid_ps_offset; uint32_t copy_vs_offset; @@ -710,12 +712,14 @@ struct radeon_accel_state { uint32_t xv_ps_offset; //size/addr stuff + struct radeon_bo *src_bo[2]; uint32_t src_size[2]; uint64_t src_mc_addr[2]; uint32_t src_pitch[2]; uint32_t src_width[2]; uint32_t src_height[2]; uint32_t src_bpp[2]; + struct radeon_bo *dst_bo; uint32_t dst_size; uint64_t dst_mc_addr; uint32_t dst_pitch; @@ -731,6 +735,7 @@ struct radeon_accel_state { // copy ExaOffscreenArea *copy_area; + struct radeon_bo *copy_area_bo; Bool same_surface; int rop; uint32_t planemask; diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c index 051cc78..e94107c 100644 --- a/src/radeon_dri2.c +++ b/src/radeon_dri2.c @@ -362,7 +362,9 @@ radeon_dri2_screen_init(ScreenPtr pScreen) return FALSE; } - if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { + if ( (info->ChipFamily >= CHIP_FAMILY_R600) ) { + dri2_info.driverName = R600_DRIVER_NAME; + } else if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { dri2_info.driverName = R300_DRIVER_NAME; } else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) { dri2_info.driverName = R200_DRIVER_NAME; diff --git a/src/radeon_exa.c b/src/radeon_exa.c index ca4c523..7b9164e 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c @@ -336,7 +336,7 @@ static void RADEONFinishAccess_BE(PixmapPtr pPix, int index) #endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ #ifdef XF86DRM_MODE -static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) +Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) { ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; struct radeon_exa_pixmap_priv *driver_priv; @@ -362,7 +362,7 @@ static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) return TRUE; } -static void RADEONFinishAccess_CS(PixmapPtr pPix, int index) +void RADEONFinishAccess_CS(PixmapPtr pPix, int index) { struct radeon_exa_pixmap_priv *driver_priv; @@ -454,7 +454,7 @@ void *RADEONEXACreatePixmap2(ScreenPtr pScreen, int width, int height, return new_priv; } -static void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) +void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) { struct radeon_exa_pixmap_priv *driver_priv = driverPriv; @@ -487,7 +487,7 @@ void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo) } } -static Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) +Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) { struct radeon_exa_pixmap_priv *driver_priv; diff --git a/src/radeon_kms.c b/src/radeon_kms.c index cd398c6..faa0cfd 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -180,7 +180,7 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to allocate accel_state rec!\n"); return FALSE; } - +#if 0 if (info->ChipFamily >= CHIP_FAMILY_R600) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using shadowfb for KMS on R600+\n"); @@ -189,7 +189,7 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn) info->r600_shadow_fb = FALSE; return TRUE; } - +#endif if ((info->ChipFamily == CHIP_FAMILY_RS100) || (info->ChipFamily == CHIP_FAMILY_RS200) || |