From b6368cc572c79bce9a9366242c727c13cab3f006 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 12:14:33 -0400 Subject: r6xx/r7xx: move more common state to default state setup --- src/r600_exa.c | 33 --------------------------------- src/r600_textured_videofuncs.c | 11 ----------- src/r6xx_accel.c | 16 ++++++++++++++++ 3 files changed, 16 insertions(+), 44 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 3e77515..0a9a0c6 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -164,10 +164,6 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); @@ -212,7 +208,6 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) if (pm & 0xff000000) pmask |= 8; /* A */ EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); cb_conf.id = 0; @@ -234,12 +229,6 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); @@ -408,10 +397,6 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_window_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); @@ -513,7 +498,6 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, if (planemask & 0xff000000) pmask |= 8; /* A */ EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); @@ -540,12 +524,6 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* export tex coord from VS */ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); @@ -1463,10 +1441,6 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); @@ -1526,7 +1500,6 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_setup (pScrn, accel_state->ib, &ps_conf); EREG(accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); @@ -1566,12 +1539,6 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ if (pMask) { /* export 2 tex coords from VS */ diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 5dc79c9..4502ab3 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -241,10 +241,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); @@ -473,7 +469,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* Render setup */ EREG(accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ cb_conf.id = 0; @@ -506,12 +501,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* export tex coords from VS */ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 0457f7d..55188a4 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -775,6 +775,11 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) (2 << ALPHA_TO_MASK_OFFSET2_shift) | (2 << ALPHA_TO_MASK_OFFSET3_shift))); + + EREG(ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + // SX EREG(ib, SX_ALPHA_TEST_CONTROL, 0); EREG(ib, SX_ALPHA_REF, 0); @@ -808,6 +813,8 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EFLOAT(ib, 1.0); } EREG(ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); + EREG(ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + // SC EREG(ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | @@ -837,6 +844,11 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EREG(ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | 0x00500000)); /* ? */ + EREG(ib, PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + + EREG(ib, PA_SC_LINE_CNTL, 0); EREG(ib, PA_SC_AA_CONFIG, 0); EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); @@ -867,6 +879,10 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ EFLOAT(ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ + /* Scissor / viewport */ + EREG(ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + EREG(ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + // SU EREG(ib, PA_SU_SC_MODE_CNTL, FACE_bit); EREG(ib, PA_SU_POINT_SIZE, 0); -- cgit v1.2.3 From 9cf965bbc977f0523437c0ecf1d7363b17de2468 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 12:47:34 -0400 Subject: R6xx/r7xx: add begin/end batch macros --- src/r600_exa.c | 12 +++++ src/r600_state.h | 3 ++ src/r600_textured_videofuncs.c | 5 +- src/r6xx_accel.c | 107 ++++++++++++++++++++++++++++++++--------- 4 files changed, 102 insertions(+), 25 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 0a9a0c6..555748b 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -207,8 +207,10 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pmask |= 1; /* R */ if (pm & 0xff000000) pmask |= 8; /* A */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; @@ -231,6 +233,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) /* Interpolator setup */ /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -245,6 +248,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) FLAT_SHADE_bit | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit); + END_BATCH(); /* PS alu constants */ if (pPix->drawable.bitsPerPixel == 16) { @@ -497,8 +501,10 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, pmask |= 1; /* R */ if (planemask & 0xff000000) pmask |= 8; /* A */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); + END_BATCH(); accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); accel_state->dst_mc_addr = dst_offset; @@ -526,6 +532,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, /* Interpolator setup */ /* export tex coord from VS */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -539,6 +546,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, (0x01 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); + END_BATCH(); } static void @@ -1499,6 +1507,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.export_mode = 2; ps_setup (pScrn, accel_state->ib, &ps_conf); + BEGIN_BATCH(12); EREG(accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); @@ -1513,6 +1522,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, PER_MRT_BLEND_bit)); EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl); } + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; @@ -1540,6 +1550,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, set_render_target(pScrn, accel_state->ib, &cb_conf); /* Interpolator setup */ + BEGIN_BATCH(21); if (pMask) { /* export 2 tex coords from VS */ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); @@ -1566,6 +1577,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, (0x01 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); + END_BATCH(); return TRUE; } diff --git a/src/r600_state.h b/src/r600_state.h index 8f20e42..10b1022 100644 --- a/src/r600_state.h +++ b/src/r600_state.h @@ -170,6 +170,9 @@ typedef struct { uint32_t num_indices; } draw_config_t; +#define BEGIN_BATCH(n) do {} while(0) +#define END_BATCH() do {} while(0) + #define E32(ib, dword) \ do { \ uint32_t *ib_head = (pointer)(char*)(ib)->address; \ diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 4502ab3..6739616 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -468,8 +468,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) } /* Render setup */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); EREG(accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ + END_BATCH(); cb_conf.id = 0; @@ -503,6 +505,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* Interpolator setup */ /* export tex coords from VS */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -514,7 +517,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) (0x03 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); - + END_BATCH(); vs_alu_consts[0] = 1.0 / pPriv->w; vs_alu_consts[1] = 1.0 / pPriv->h; diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 55188a4..059c3cc 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -53,7 +53,9 @@ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) // buffer->idx); while (buffer->used & 0x3c){ + BEGIN_BATCH(); E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ + END_BATCH(); } //ErrorF("buffer bytes: %d\n", buffer->used); @@ -81,19 +83,21 @@ wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) { //flush caches, don't generate timestamp + BEGIN_BATCH(5); PACK3(ib, IT_EVENT_WRITE, 1); E32(ib, CACHE_FLUSH_AND_INV_EVENT); // wait for 3D idle clean EREG(ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit)); + END_BATCH(); } void wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) { - + BEGIN_BATCH(3); EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); - + END_BATCH(); } void @@ -102,13 +106,16 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) RADEONInfoPtr info = RADEONPTR(pScrn); if (info->ChipFamily < CHIP_FAMILY_RV770) { + BEGIN_BATCH(5); PACK3(ib, IT_START_3D_CMDBUF, 1); E32(ib, 0); - } + } else + BEGIN_BATCH(3); PACK3(ib, IT_CONTEXT_CONTROL, 2); E32(ib, 0x80000000); E32(ib, 0x80000000); + END_BATCH(); wait_3d_idle_clean (pScrn, ib); } @@ -158,6 +165,7 @@ sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); + BEGIN_BATCH(8); PACK0(ib, SQ_CONFIG, 6); E32(ib, sq_config); E32(ib, sq_gpr_resource_mgmt_1); @@ -165,7 +173,7 @@ sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) E32(ib, sq_thread_resource_mgmt); E32(ib, sq_stack_resource_mgmt_1); E32(ib, sq_stack_resource_mgmt_2); - + END_BATCH(); } void @@ -204,6 +212,11 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) h = (cb_conf->h + 7) & ~7; slice = ((cb_conf->w * h) / 64) - 1; + if ((info->ChipFamily > CHIP_FAMILY_R600) && + (info->ChipFamily < CHIP_FAMILY_RV770)) + BEGIN_BATCH(23); + else + BEGIN_BATCH(21); EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); // rv6xx workaround @@ -223,6 +236,7 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | (0 << FMASK_TILE_MAX_shift))); + END_BATCH(); } void @@ -234,11 +248,13 @@ cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_ else cp_coher_size = ((size + 255) >> 8); + BEGIN_BATCH(5); PACK3(ib, IT_SURFACE_SYNC, 4); E32(ib, sync_type); E32(ib, cp_coher_size); E32(ib, (mc_addr >> 8)); E32(ib, 10); /* poll interval */ + END_BATCH(); } /* inserts a wait for vline in the command stream */ @@ -278,6 +294,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, radeon_crtc = xf86_config->crtc[crtc]->driver_private; + BEGIN_BATCH(10); /* set the VLINE range */ EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset, (start << AVIVO_D1MODE_VLINE_START_SHIFT) | @@ -291,6 +308,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, E32(ib, 0); // Ref value E32(ib, AVIVO_D1MODE_VLINE_STAT); // Mask E32(ib, 10); // Wait interval + END_BATCH(); } void @@ -304,9 +322,11 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) if (fs_conf->dx10_clamp) sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; + BEGIN_BATCH(9); EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); + END_BATCH(); } void @@ -324,9 +344,11 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) if (vs_conf->uncached_first_inst) sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + BEGIN_BATCH(9); EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); + END_BATCH(); } void @@ -346,10 +368,12 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) if (ps_conf->clamp_consts) sq_pgm_resources |= CLAMP_CONSTS_bit; + BEGIN_BATCH(12); EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); + END_BATCH(); } void @@ -358,9 +382,11 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co int i; const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); + BEGIN_BATCH(2 + count_reg); PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); for (i = 0; i < countreg; i++) EFLOAT(ib, const_buf[i]); + END_BATCH(); } void @@ -369,7 +395,9 @@ set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) /* bool register order is: ps, vs, gs; one register each * 1 bits per bool; 32 bools each for ps, vs, gs. */ + BEGIN_BATCH(3); EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); + END_BATCH(); } void @@ -391,6 +419,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) if (res->srf_mode_all) sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; + BEGIN_BATCH(9); PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE @@ -399,6 +428,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) E32(ib, 0); // 4: n/a E32(ib, 0); // 5: n/a E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE + END_BATCH(); } void @@ -453,6 +483,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) if (tex_res->interlaced) sq_tex_resource_word6 |= INTERLACED_bit; + BEGIN_BATCH(9); PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); E32(ib, sq_tex_resource_word0); E32(ib, sq_tex_resource_word1); @@ -461,6 +492,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) E32(ib, sq_tex_resource_word4); E32(ib, sq_tex_resource_word5); E32(ib, sq_tex_resource_word6); + END_BATCH(); } void @@ -505,27 +537,31 @@ set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) if (s->type) sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; + BEGIN_BATCH(5); PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); E32(ib, sq_tex_sampler_word0); E32(ib, sq_tex_sampler_word1); E32(ib, sq_tex_sampler_word2); + END_BATCH(); } //XXX deal with clip offsets in clip setup void set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { - + BEGIN_BATCH(6); EREG(ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); EREG(ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); } void set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { + BEGIN_BATCH(6); EREG(ib, PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | @@ -533,40 +569,45 @@ set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x EREG(ib, PA_SC_VPORT_SCISSOR_0_BR + id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); + END_BATCH(); } void set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + BEGIN_BATCH(6); EREG(ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | WINDOW_OFFSET_DISABLE_bit)); EREG(ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); + END_BATCH(); } void set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { - + BEGIN_BATCH(6); EREG(ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | WINDOW_OFFSET_DISABLE_bit)); EREG(ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); } void set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { - + BEGIN_BATCH(6); EREG(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); EREG(ib, PA_SC_CLIPRECT_0_BR + id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); + END_BATCH(); } /* @@ -594,6 +635,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) wait_3d_idle(pScrn, ib); // ASIC specific setup, see drm + BEGIN_BATCH(15); if (info->ChipFamily < CHIP_FAMILY_RV770) { EREG(ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | (28 << TD_FIFO_CREDIT_shift))); @@ -619,6 +661,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) (4 << DEPTH_CACHELINE_FREE_shift) | 0)); } + END_BATCH(); // SQ sq_conf.ps_prio = 0; @@ -744,6 +787,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) sq_setup(pScrn, ib, &sq_conf); + BEGIN_BATCH(59); EREG(ib, SQ_VTX_BASE_VTX_LOC, 0); EREG(ib, SQ_VTX_START_INST_LOC, 0); @@ -790,28 +834,29 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) E32(ib, 0x00000000); E32(ib, 0x00000000); E32(ib, 0x00000000); + END_BATCH(); if (info->ChipFamily < CHIP_FAMILY_RV770) { + BEGIN_BATCH(11); PACK0(ib, CB_FOG_RED, 3); E32(ib, 0x00000000); E32(ib, 0x00000000); E32(ib, 0x00000000); + PACK0(ib, CB_CLEAR_RED, 4); + EFLOAT(ib, 1.0); /* WTF? */ + EFLOAT(ib, 0.0); + EFLOAT(ib, 1.0); + EFLOAT(ib, 1.0); + END_BATCH(); } + BEGIN_BATCH(18); PACK0(ib, CB_CLRCMP_CONTROL, 4); E32(ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC E32(ib, 0); // CB_CLRCMP_SRC E32(ib, 0); // CB_CLRCMP_DST E32(ib, 0); // CB_CLRCMP_MSK - - if (info->ChipFamily < CHIP_FAMILY_RV770) { - PACK0(ib, CB_CLEAR_RED, 4); - EFLOAT(ib, 1.0); /* WTF? */ - EFLOAT(ib, 0.0); - EFLOAT(ib, 1.0); - EFLOAT(ib, 1.0); - } EREG(ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); EREG(ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); @@ -821,23 +866,29 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) (0 << WINDOW_Y_OFFSET_shift))); EREG(ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); + END_BATCH(); /* clip boolean is set to always visible -> doesn't matter */ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); + BEGIN_BATCH(3); if (info->ChipFamily < CHIP_FAMILY_RV770) EREG(ib, R7xx_PA_SC_EDGERULE, 0x00000000); else EREG(ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); + END_BATCH(); for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) { set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); + BEGIN_BATCH(4); PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2); EFLOAT(ib, 0.0); EFLOAT(ib, 1.0); + END_BATCH(); } + BEGIN_BATCH(15); if (info->ChipFamily < CHIP_FAMILY_RV770) EREG(ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); else @@ -852,13 +903,17 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EREG(ib, PA_SC_LINE_CNTL, 0); EREG(ib, PA_SC_AA_CONFIG, 0); EREG(ib, PA_SC_AA_MASK, 0xFFFFFFFF); + END_BATCH(); //XXX: double check this if (info->ChipFamily > CHIP_FAMILY_R600) { + BEGIN_BATCH(6); EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0); EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0); + END_BATCH(); } + BEGIN_BATCH(83); EREG(ib, PA_SC_LINE_STIPPLE, 0); EREG(ib, PA_SC_MPASS_PS_CNTL, 0); @@ -908,17 +963,18 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EREG(ib, SPI_FOG_CNTL, 0); EREG(ib, SPI_FOG_FUNC_SCALE, 0); EREG(ib, SPI_FOG_FUNC_BIAS, 0); + END_BATCH(); // clear FS fs_setup(pScrn, ib, &fs_conf); // VGT + BEGIN_BATCH(75); EREG(ib, VGT_MAX_VTX_INDX, 2048); /* XXX set to a reasonably large number of indices */ EREG(ib, VGT_MIN_VTX_INDX, 0); EREG(ib, VGT_INDX_OFFSET, 0); EREG(ib, VGT_INSTANCE_STEP_RATE_0, 0); EREG(ib, VGT_INSTANCE_STEP_RATE_1, 0); - EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0); EREG(ib, VGT_OUTPUT_PATH_CNTL, 0); EREG(ib, VGT_GS_MODE, 0); @@ -939,7 +995,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) EREG(ib, VGT_REUSE_OFF, 0); EREG(ib, VGT_VTX_CNT_EN, 0); EREG(ib, VGT_STRMOUT_BUFFER_EN, 0); - + END_BATCH(); } @@ -952,12 +1008,6 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i { uint32_t i, count; - EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); - PACK3(ib, IT_INDEX_TYPE, 1); - E32(ib, draw_conf->index_type); - PACK3(ib, IT_NUM_INSTANCES, 1); - E32(ib, draw_conf->num_instances); - // calculate num of packets count = 2; if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) @@ -965,6 +1015,13 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i else count += draw_conf->num_indices; + BEGIN_BATCH(8 + count); + EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); + PACK3(ib, IT_INDEX_TYPE, 1); + E32(ib, draw_conf->index_type); + PACK3(ib, IT_NUM_INSTANCES, 1); + E32(ib, draw_conf->num_instances); + PACK3(ib, IT_DRAW_INDEX_IMMD, count); E32(ib, draw_conf->num_indices); E32(ib, draw_conf->vgt_draw_initiator); @@ -980,12 +1037,13 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i for (i = 0; i < draw_conf->num_indices; i++) E32(ib, indices[i]); } + END_BATCH(); } void draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) { - + BEGIN_BATCH(10); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); E32(ib, draw_conf->index_type); @@ -994,6 +1052,7 @@ draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) PACK3(ib, IT_DRAW_INDEX_AUTO, 2); E32(ib, draw_conf->num_indices); E32(ib, draw_conf->vgt_draw_initiator); + END_BATCH(); } void -- cgit v1.2.3 From 69ec7a35e2a0a3d802ec093a6aab2d7ed2cc88be Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 16:14:02 -0400 Subject: r6xx/r7xx: first pass at kms accel support Adapted from various patches from Dave and Jerome. --- src/r600_exa.c | 531 +++++++++++++++++++++++++++++++---------- src/r600_state.h | 50 +++- src/r600_textured_videofuncs.c | 78 ++++-- src/r6xx_accel.c | 189 +++++++++++++-- src/radeon.h | 5 + src/radeon_dri2.c | 4 +- src/radeon_exa.c | 8 +- 7 files changed, 688 insertions(+), 177 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 555748b..03d3d8c 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -133,7 +133,15 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) if (!R600CheckBPP(pPix->drawable.bitsPerPixel)) RADEON_FALLBACK(("R600CheckDatatype failed\n")); - accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pPix); + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); @@ -156,10 +164,15 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); #endif - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + RADEON_FALLBACK(("Can't get VB\n")); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); @@ -168,10 +181,18 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->solid_vs_offset; + accel_state->ps_mc_addr = accel_state->solid_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -179,16 +200,19 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -196,6 +220,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); /* Render setup */ @@ -216,6 +241,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPix->drawable.height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (pPix->drawable.bitsPerPixel == 8) { cb_conf.format = COLOR_8; @@ -296,8 +322,9 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) { R600DoneSolid(pPix); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8); @@ -342,10 +369,12 @@ R600DoneSolid(PixmapPtr pPix) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -353,6 +382,7 @@ R600DoneSolid(PixmapPtr pPix) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); /* Draw */ @@ -368,15 +398,22 @@ R600DoneSolid(PixmapPtr pPix) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); + + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = NULL; } static void R600DoPrepareCopy(ScrnInfoPtr pScrn, - int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, - int dst_pitch, int dst_width, int dst_height, uint32_t dst_offset, int dst_bpp, + int src_pitch, int src_width, int src_height, + uint32_t src_offset, struct radeon_bo *src_bo, int src_bpp, + int dst_pitch, int dst_width, int dst_height, + uint32_t dst_offset, struct radeon_bo *dst_bo, int dst_bpp, int rop, Pixel planemask) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -393,8 +430,25 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); + accel_state->src_mc_addr[0] = src_offset; + accel_state->src_pitch[0] = src_pitch; + accel_state->src_width[0] = src_width; + accel_state->src_height[0] = src_height; + accel_state->src_bpp[0] = src_bpp; + accel_state->src_bo[0] = src_bo; + accel_state->src_bo[1] = NULL; + + accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); + accel_state->dst_mc_addr = dst_offset; + accel_state->dst_pitch = dst_pitch; + accel_state->dst_height = dst_height; + accel_state->dst_bpp = dst_bpp; + accel_state->dst_bo = dst_bo; + + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; /* Init */ start_3d(pScrn, accel_state->ib); @@ -405,10 +459,18 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, set_screen_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_window_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->copy_vs_offset; + accel_state->ps_mc_addr = accel_state->copy_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -416,16 +478,19 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -433,18 +498,13 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); - accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); - accel_state->src_mc_addr[0] = src_offset; - accel_state->src_pitch[0] = src_pitch; - accel_state->src_width[0] = src_width; - accel_state->src_height[0] = src_height; - accel_state->src_bpp[0] = src_bpp; - /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[0], accel_state->src_mc_addr[0]); + accel_state->src_size[0], accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = 0; @@ -455,6 +515,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; if (src_bpp == 8) { tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_1; /* R */ @@ -506,16 +568,11 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); END_BATCH(); - accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); - accel_state->dst_mc_addr = dst_offset; - accel_state->dst_pitch = dst_pitch; - accel_state->dst_height = dst_height; - accel_state->dst_bpp = dst_bpp; - cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = dst_height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (dst_bpp == 8) { cb_conf.format = COLOR_8; cb_conf.comp_swap = 3; /* A */ @@ -575,10 +632,12 @@ R600DoCopy(ScrnInfoPtr pScrn) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -586,6 +645,7 @@ R600DoCopy(ScrnInfoPtr pScrn) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -600,7 +660,8 @@ R600DoCopy(ScrnInfoPtr pScrn) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -617,8 +678,9 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoCopy(pScrn); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -663,8 +725,19 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); - accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[0] = 0; + accel_state->dst_mc_addr = 0; + accel_state->src_bo[0] = radeon_get_pixmap_bo(pSrc); + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + } else +#endif + { + accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + } accel_state->src_width[0] = pSrc->drawable.width; accel_state->src_height[0] = pSrc->drawable.height; @@ -701,19 +774,46 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; accel_state->same_surface = TRUE; - if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); - accel_state->copy_area = NULL; +#if defined(XF86DRM_MODE) + if (info->cs) { + if (accel_state->copy_area_bo) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + } + accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, + 4096, + RADEON_GEM_DOMAIN_VRAM, + 0); + if (accel_state->copy_area_bo == NULL) { + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (radeon_cs_space_check(info->cs)) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + accel_state->copy_area = (void*)accel_state->copy_area_bo; + } else +#endif + { + if (accel_state->copy_area) { + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + accel_state->copy_area = NULL; + } + accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); } - accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); } else { accel_state->same_surface = FALSE; R600DoPrepareCopy(pScrn, accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, - accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, + accel_state->src_mc_addr[0], accel_state->src_bo[0], pSrc->drawable.bitsPerPixel, accel_state->dst_pitch, pDst->drawable.width, pDst->drawable.height, - accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, + accel_state->dst_mc_addr, accel_state->dst_bo, pDst->drawable.bitsPerPixel, rop, planemask); } @@ -745,6 +845,17 @@ R600OverlapCopy(PixmapPtr pDst, uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; int i, hchunk, vchunk; + struct radeon_bo *dst_bo = NULL; + +#if defined(XF86DRM_MODE) + if (info->cs) { + dst_offset = 0; + dst_bo = radeon_get_pixmap_bo(pDst); + radeon_cs_space_add_persistent_bo(info->cs, dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } +#endif if (is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { @@ -760,8 +871,10 @@ R600OverlapCopy(PixmapPtr pDst, if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */ if (srcY > dstY ) { /* diagonal up */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); R600DoCopy(pScrn); @@ -770,8 +883,10 @@ R600OverlapCopy(PixmapPtr pDst, dstY = dstY + vchunk; } else { /* diagonal down */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); R600DoCopy(pScrn); @@ -781,8 +896,10 @@ R600OverlapCopy(PixmapPtr pDst, } else { /* reduce to vertical */ if (srcX > dstX ) { /* diagonal left */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); R600DoCopy(pScrn); @@ -791,8 +908,10 @@ R600OverlapCopy(PixmapPtr pDst, dstX = dstX + hchunk; } else { /* diagonal right */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -807,8 +926,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy right to left */ for (i = w; i > 0; i -= hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -817,8 +938,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy left to right */ for (i = 0; i < w; i += hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); @@ -830,8 +953,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy top to bottom */ for (i = 0; i < h; i += vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > h - i) vchunk = h - i; @@ -842,8 +967,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy bottom to top */ for (i = h; i > 0; i -= vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > i) vchunk = i; @@ -854,8 +981,10 @@ R600OverlapCopy(PixmapPtr pDst, } } else { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); @@ -872,27 +1001,45 @@ R600Copy(PixmapPtr pDst, ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_bo *bo = NULL; if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) return; - if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { +#if defined(XF86DRM_MODE) + if (info->cs) + bo = radeon_get_pixmap_bo(pDst); +#endif + + if (accel_state->same_surface && + is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { if (accel_state->copy_area) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t orig_offset, tmp_offset; - tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; - orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; - +#if defined(XF86DRM_MODE) + if (info->cs) { + tmp_offset = 0; + orig_offset = 0; + } else +#endif + { + tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; + orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + } R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -900,11 +1047,20 @@ R600Copy(PixmapPtr pDst, R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); } else if (accel_state->same_surface) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); - uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + uint32_t offset; + +#if defined(XF86DRM_MODE) + if (info->cs) + offset = 0; + else +#endif + offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -925,10 +1081,13 @@ R600DoneCopy(PixmapPtr pDst) R600DoCopy(pScrn); if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + if (!info->cs) + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); accel_state->copy_area = NULL; } - + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = NULL; } @@ -1103,7 +1262,16 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, CLEAR (tex_res); CLEAR (tex_samp); - accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[unit] = 0; + accel_state->src_bo[unit] = radeon_get_pixmap_bo(pPix); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[unit], + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } else +#endif + accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * pPix->drawable.height; @@ -1122,7 +1290,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[unit], accel_state->src_mc_addr[unit]); + accel_state->src_size[unit], accel_state->src_mc_addr[unit], + accel_state->src_bo[unit], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = unit; @@ -1134,6 +1303,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_res.base = accel_state->src_mc_addr[unit]; tex_res.mip_base = accel_state->src_mc_addr[unit]; tex_res.format = R600TexFormats[i].card_fmt; + tex_res.bo = accel_state->src_bo[unit]; + tex_res.mip_bo = accel_state->src_bo[unit]; tex_res.request_size = 1; /* component swizzles */ @@ -1406,6 +1577,9 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, /* return FALSE; */ + if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) + return FALSE; + if (pMask) { accel_state->msk_pic = pMaskPicture; if (pMaskPicture->componentAlpha) { @@ -1424,7 +1598,13 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, accel_state->src_alpha = FALSE; } - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; @@ -1441,8 +1621,9 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + RADEON_FALLBACK(("Can't get VB\n")); /* Init */ start_3d(pScrn, accel_state->ib); @@ -1470,16 +1651,31 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, if (pMask) { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_mask_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_mask_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_mask_ps_offset; } else { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + accel_state->comp_ps_offset; } - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_vs_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->vs_mc_addr = accel_state->comp_vs_offset; + else +#endif + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_vs_offset; accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -1488,16 +1684,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 3; vs_conf.stack_size = 1; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; @@ -1505,6 +1704,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); BEGIN_BATCH(12); @@ -1529,6 +1729,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_conf.h = pDst->drawable.height; cb_conf.base = accel_state->dst_mc_addr; cb_conf.format = dst_format; + cb_conf.bo = accel_state->dst_bo; switch (pDstPicture->format) { case PICT_a8r8g8b8: @@ -1611,8 +1812,9 @@ static void R600Composite(PixmapPtr pDst, if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24); @@ -1650,8 +1852,9 @@ static void R600Composite(PixmapPtr pDst, } else { if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -1701,6 +1904,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } else { accel_state->vb_size = accel_state->vb_index * 16; vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -1708,6 +1912,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -1716,12 +1921,14 @@ static void R600DoneComposite(PixmapPtr pDst) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); - set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + set_vtx_resource(pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; @@ -1734,9 +1941,14 @@ static void R600DoneComposite(PixmapPtr pDst) wait_3d_idle_clean(pScrn, accel_state->ib); cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); + + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = NULL; } Bool @@ -1753,6 +1965,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, int scratch_offset = 0, hpass, temph; char *dst; drmBufPtr scratch; + struct radeon_bo *bo = NULL; if (dst_pitch & 7) return FALSE; @@ -1795,8 +2008,10 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, } /* blit from scratch to vram */ R600DoPrepareCopy(pScrn, - scratch_pitch, w, oldhpass, offset, bpp, - dst_pitch, dst_width, dst_height, dst_mc_addr, bpp, + scratch_pitch, w, oldhpass, + offset, bo, bpp, + dst_pitch, dst_width, dst_height, + dst_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); R600DoCopy(pScrn); @@ -1842,6 +2057,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); int wpass = w * (bpp/8); drmBufPtr scratch; + struct radeon_bo *bo = NULL; /* RV740 seems to be particularly problematic with small xfers */ if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32)) @@ -1859,8 +2075,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1876,8 +2094,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, scratch_offset = scratch->total/2 - scratch_offset; /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr + scratch_offset, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr + scratch_offset, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1919,7 +2139,12 @@ R600Sync(ScreenPtr pScreen, int marker) struct radeon_accel_state *accel_state = info->accel_state; if (accel_state->exaMarkerSynced != marker) { - RADEONWaitForIdleCP(pScrn); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + RADEONWaitForIdleCP(pScrn); accel_state->exaMarkerSynced = marker; } @@ -1936,11 +2161,27 @@ R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) accel_state->shaders = NULL; - accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, - TRUE, NULL, NULL); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 4096, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->shaders_bo == NULL) { + ErrorF("Allocating shader failed\n"); + return FALSE; + } + return TRUE; + } else +#endif +#endif + { + accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, + TRUE, NULL, NULL); + + if (accel_state->shaders == NULL) + return FALSE; + } - if (accel_state->shaders == NULL) - return FALSE; return TRUE; } @@ -1951,8 +2192,21 @@ R600LoadShaders(ScrnInfoPtr pScrn) struct radeon_accel_state *accel_state = info->accel_state; RADEONChipFamily ChipSet = info->ChipFamily; uint32_t *shader; - - shader = (pointer)((char *)info->FB + accel_state->shaders->offset); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + int ret; + + if (info->cs) { + ret = radeon_bo_map(accel_state->shaders_bo, 1); + if (ret) { + FatalError("failed to map shader %d\n", ret); + return FALSE; + } + shader = accel_state->shaders_bo->ptr; + } else +#endif +#endif + shader = (pointer)((char *)info->FB + accel_state->shaders->offset); /* solid vs --------------------------------------- */ accel_state->solid_vs_offset = 0; @@ -1990,6 +2244,14 @@ R600LoadShaders(ScrnInfoPtr pScrn) accel_state->xv_ps_offset = 4096; R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + radeon_bo_unmap(accel_state->shaders_bo); + } +#endif +#endif + return TRUE; } @@ -2018,7 +2280,6 @@ R600FinishAccess(PixmapPtr pPix, int index) } - Bool R600DrawInit(ScreenPtr pScreen) { @@ -2044,13 +2305,28 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->exa->MarkSync = R600MarkSync; info->accel_state->exa->WaitMarker = R600Sync; - info->accel_state->exa->PrepareAccess = R600PrepareAccess; - info->accel_state->exa->FinishAccess = R600FinishAccess; - - /* AGP seems to have problems with gart transfers */ - if (info->accelDFS) { - info->accel_state->exa->UploadToScreen = R600UploadToScreen; - info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; + info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; + info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; + info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; + info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; + info->accel_state->exa->UploadToScreen = NULL; + info->accel_state->exa->DownloadFromScreen = NULL; + } else +#endif +#endif + { + info->accel_state->exa->PrepareAccess = R600PrepareAccess; + info->accel_state->exa->FinishAccess = R600FinishAccess; + + /* AGP seems to have problems with gart transfers */ + if (info->accelDFS) { + info->accel_state->exa->UploadToScreen = R600UploadToScreen; + info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; + } } info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; @@ -2083,8 +2359,13 @@ R600DrawInit(ScreenPtr pScreen) return FALSE; } - if (!info->gartLocation) - return FALSE; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + if (!info->gartLocation) + return FALSE; info->accel_state->XInited3D = FALSE; info->accel_state->copy_area = NULL; diff --git a/src/r600_state.h b/src/r600_state.h index 10b1022..6ca88cf 100644 --- a/src/r600_state.h +++ b/src/r600_state.h @@ -50,6 +50,7 @@ typedef struct { int round_mode; int tile_compact; int source_format; + struct radeon_bo *bo; } cb_config_t; /* Depth buffer */ @@ -63,6 +64,7 @@ typedef struct { int tile_surface_en; int tile_compact; int zrange_precision; + struct radeon_bo *bo; } db_config_t; /* Shader */ @@ -79,6 +81,7 @@ typedef struct { int clamp_consts; int export_mode; int uncached_first_inst; + struct radeon_bo *bo; } shader_config_t; /* Vertex buffer / vtx resource */ @@ -94,6 +97,7 @@ typedef struct { int srf_mode_all; int endian; int mem_req_size; + struct radeon_bo *bo; } vtx_resource_t; /* Texture resource */ @@ -129,6 +133,8 @@ typedef struct { int mpeg_clamp; int perf_modulation; int interlaced; + struct radeon_bo *bo; + struct radeon_bo *mip_bo; } tex_resource_t; /* Texture sampler */ @@ -170,15 +176,43 @@ typedef struct { uint32_t num_indices; } draw_config_t; +#if defined(XF86DRM_MODE) +#define BEGIN_BATCH(n) \ +do { \ + if (info->cs) \ + radeon_ddx_cs_start(pScrn, (n), __FILE__, __func__, __LINE__); \ +} while(0) +#define END_BATCH() \ +do { \ + if (info->cs) \ + radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); \ +} while(0) +#define RELOC_BATCH(bo, rd, wd) \ +do { \ + if (info->cs) \ + OUT_RING_RELOC((bo), (rd), (wd)); \ +} while(0) +#define E32(ib, dword) \ +do { \ + if (info->cs) \ + radeon_cs_write_dword(info->cs, (dword)); \ + else { \ + uint32_t *ib_head = (pointer)(char*)(ib)->address; \ + ib_head[(ib)->used >> 2] = (dword); \ + (ib)->used += 4; \ + } \ +} while (0) +#else #define BEGIN_BATCH(n) do {} while(0) #define END_BATCH() do {} while(0) - +#define RELOC_BATCH(bo, wd, rd) do {} while(0) #define E32(ib, dword) \ do { \ uint32_t *ib_head = (pointer)(char*)(ib)->address; \ ib_head[(ib)->used >> 2] = (dword); \ (ib)->used += 4; \ } while (0) +#endif #define EFLOAT(ib, val) \ do { \ @@ -246,7 +280,8 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); void set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf); void -cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr); +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain); void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, int crtc, int start, int stop); void @@ -282,9 +317,18 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i void draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); -void +Bool r600_vb_get(ScrnInfoPtr pScrn); void r600_vb_discard(ScrnInfoPtr pScrn); +int +r600_cp_start(ScrnInfoPtr pScrn); + +extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index); +extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index); +extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align); +extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv); +extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix); +extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix); #endif diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 6739616..9cbfea4 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -80,10 +80,12 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -91,6 +93,7 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -105,9 +108,13 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) /* sync destination surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); R600CPFlushIndirect(pScrn, accel_state->ib); + accel_state->dst_bo = NULL; + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; } void @@ -216,6 +223,19 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) CLEAR (vs_conf); CLEAR (ps_conf); +#if defined(ACCEL_CP) && defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->src_mc_addr[0] = 0; + accel_state->src_bo[0] = pPriv->src_bo; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = radeon_get_pixmap_bo(pPixmap); + } else +#endif + { + accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; + accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; + } accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = pPriv->src_pitch; @@ -233,8 +253,9 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) dstyoff = 0; #endif - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; /* Init */ start_3d(pScrn, accel_state->ib); @@ -245,11 +266,18 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->xv_vs_offset; - - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->xv_ps_offset; +#if defined(ACCEL_CP) && defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->xv_vs_offset; + accel_state->ps_mc_addr = accel_state->xv_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_ps_offset; + } /* PS bool constant */ switch(pPriv->id) { @@ -271,16 +299,19 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; @@ -288,6 +319,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); /* PS alu constants */ @@ -298,12 +330,12 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) switch(pPriv->id) { case FOURCC_YV12: case FOURCC_I420: - accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], - accel_state->src_mc_addr[0]); + accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Y texture */ tex_res.id = 0; @@ -314,6 +346,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_X; /* Y */ @@ -345,7 +379,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, - accel_state->src_mc_addr[0] + pPriv->planev_offset); + accel_state->src_mc_addr[0] + pPriv->planev_offset, + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); tex_res.id = 1; tex_res.format = FMT_8; @@ -369,7 +404,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, - accel_state->src_mc_addr[0] + pPriv->planeu_offset); + accel_state->src_mc_addr[0] + pPriv->planeu_offset, + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); tex_res.id = 2; tex_res.format = FMT_8; @@ -393,12 +429,12 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) case FOURCC_UYVY: case FOURCC_YUY2: default: - accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], - accel_state->src_mc_addr[0]); + accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Y texture */ tex_res.id = 0; @@ -409,6 +445,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8_8; if (pPriv->id == FOURCC_UYVY) @@ -474,12 +512,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) END_BATCH(); cb_conf.id = 0; - - accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; - cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPixmap->drawable.height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; switch (pPixmap->drawable.bitsPerPixel) { case 16: diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 059c3cc..4aa4650 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -39,6 +39,21 @@ #include "radeon_drm.h" +void r600_cs_flush_indirect(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + int ret; + + if (!info->cs->cdw) + return; + radeon_cs_emit(info->cs); + radeon_cs_erase(info->cs); + + ret = radeon_cs_space_check(info->cs); + if (ret) + ErrorF("space check failed in flush\n"); +} + /* Flush the indirect buffer to the kernel for submission to the card */ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) { @@ -47,13 +62,20 @@ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) int start = 0; drm_radeon_indirect_t indirect; +#if defined(XF86DRM_MODE) + if (info->cs) { + r600_cs_flush_indirect(pScrn); + return; + } +#endif + if (!buffer) return; //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", // buffer->idx); while (buffer->used & 0x3c){ - BEGIN_BATCH(); + BEGIN_BATCH(1); E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ END_BATCH(); } @@ -72,6 +94,20 @@ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) { +#if defined(XF86DRM_MODE) + int ret; + RADEONInfoPtr info = RADEONPTR(pScrn); + if (info->cs) { + if (CS_FULL(info->cs)) { + r600_cs_flush_indirect(pScrn); + return; + } + radeon_cs_erase(info->cs); + ret = radeon_cs_space_check(info->cs); + if (ret) + ErrorF("space check failed in flush\n"); + } +#endif if (!ib) return; ib->used = 0; @@ -81,6 +117,7 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) void wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) { + RADEONInfoPtr info = RADEONPTR(pScrn); //flush caches, don't generate timestamp BEGIN_BATCH(5); @@ -95,6 +132,8 @@ wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) void wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(3); EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); END_BATCH(); @@ -212,19 +251,19 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) h = (cb_conf->h + 7) & ~7; slice = ((cb_conf->w * h) / 64) - 1; - if ((info->ChipFamily > CHIP_FAMILY_R600) && - (info->ChipFamily < CHIP_FAMILY_RV770)) - BEGIN_BATCH(23); - else - BEGIN_BATCH(21); + BEGIN_BATCH(3 + 2); EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); + RELOC_BATCH(cb_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); // rv6xx workaround if ((info->ChipFamily > CHIP_FAMILY_R600) && (info->ChipFamily < CHIP_FAMILY_RV770)) { + BEGIN_BATCH(20); PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); E32(ib, (2 << cb_conf->id)); - } + } else + BEGIN_BATCH(18); // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | @@ -240,20 +279,23 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) } void -cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr) +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t cp_coher_size; if (size == 0xffffffff) cp_coher_size = 0xffffffff; else cp_coher_size = ((size + 255) >> 8); - BEGIN_BATCH(5); + BEGIN_BATCH(5 + 2); PACK3(ib, IT_SURFACE_SYNC, 4); E32(ib, sync_type); E32(ib, cp_coher_size); E32(ib, (mc_addr >> 8)); E32(ib, 10); /* poll interval */ + RELOC_BATCH(bo, rdomains, wdomain); END_BATCH(); } @@ -266,6 +308,12 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, uint32_t offset; RADEONCrtcPrivatePtr radeon_crtc; + //XXX FIXME +#if defined(XF86DRM_MODE) + if (info->cs) + return; +#endif + if ((crtc < 0) || (crtc > 1)) return; @@ -314,6 +362,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, void fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | @@ -322,8 +371,12 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) if (fs_conf->dx10_clamp) sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); + RELOC_BATCH(fs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(6); EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); END_BATCH(); @@ -332,6 +385,7 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) void vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | @@ -344,8 +398,12 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) if (vs_conf->uncached_first_inst) sq_pgm_resources |= UNCACHED_FIRST_INST_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); + RELOC_BATCH(vs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(6); EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); END_BATCH(); @@ -354,6 +412,7 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) void ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | @@ -368,8 +427,12 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) if (ps_conf->clamp_consts) sq_pgm_resources |= CLAMP_CONSTS_bit; - BEGIN_BATCH(12); + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); + RELOC_BATCH(ps_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(9); EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); @@ -379,10 +442,11 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) void set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) { + RADEONInfoPtr info = RADEONPTR(pScrn); int i; const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); - BEGIN_BATCH(2 + count_reg); + BEGIN_BATCH(2 + countreg); PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); for (i = 0; i < countreg; i++) EFLOAT(ib, const_buf[i]); @@ -392,6 +456,7 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co void set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) { + RADEONInfoPtr info = RADEONPTR(pScrn); /* bool register order is: ps, vs, gs; one register each * 1 bits per bool; 32 bools each for ps, vs, gs. */ @@ -403,6 +468,7 @@ set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) void set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_vtx_constant_word2; sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | @@ -419,7 +485,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) if (res->srf_mode_all) sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(9 + 2); PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE @@ -428,12 +494,14 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) E32(ib, 0); // 4: n/a E32(ib, 0); // 5: n/a E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE + RELOC_BATCH(res->bo, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } void set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; uint32_t sq_tex_resource_word5, sq_tex_resource_word6; @@ -483,7 +551,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) if (tex_res->interlaced) sq_tex_resource_word6 |= INTERLACED_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(9 + 4); PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); E32(ib, sq_tex_resource_word0); E32(ib, sq_tex_resource_word1); @@ -492,12 +560,15 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) E32(ib, sq_tex_resource_word4); E32(ib, sq_tex_resource_word5); E32(ib, sq_tex_resource_word6); + RELOC_BATCH(tex_res->bo, RADEON_GEM_DOMAIN_VRAM, 0); + RELOC_BATCH(tex_res->mip_bo, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } void set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | @@ -549,6 +620,8 @@ set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) void set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); @@ -560,6 +633,7 @@ set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int void set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); BEGIN_BATCH(6); EREG(ib, PA_SC_VPORT_SCISSOR_0_TL + @@ -575,6 +649,7 @@ set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x void set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); BEGIN_BATCH(6); EREG(ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | @@ -588,6 +663,8 @@ set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int void set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | @@ -600,6 +677,8 @@ set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int void set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | @@ -966,6 +1045,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) END_BATCH(); // clear FS + fs_conf.bo = accel_state->shaders_bo; fs_setup(pScrn, ib, &fs_conf); // VGT @@ -1006,6 +1086,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) void draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t i, count; // calculate num of packets @@ -1043,6 +1124,8 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i void draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(10); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); @@ -1055,22 +1138,82 @@ draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) END_BATCH(); } -void +Bool r600_vb_get(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; - - accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + - (accel_state->ib->idx * accel_state->ib->total) + - (accel_state->ib->total / 2); - accel_state->vb_total = (accel_state->ib->total / 2); - accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + - (accel_state->ib->total / 2)); +#if defined(XF86DRM_MODE) + int ret; + if (info->cs) { + if (accel_state->vb_bo == NULL) { + accel_state->vb_mc_addr = 0; + accel_state->vb_bo = radeon_bo_open(info->bufmgr, 0, 16 * 1024, + 4096, RADEON_GEM_DOMAIN_GTT, 0); + if (accel_state->vb_bo == NULL) + return FALSE; + ret = radeon_bo_map(accel_state->vb_bo, 1); + if (ret) { + FatalError("failed to vb %d\n", ret); + return FALSE; + } + accel_state->vb_total = 16 * 1024; + accel_state->vb_ptr = accel_state->vb_bo->ptr; + } + } else +#endif + { + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx*accel_state->ib->total)+ + (accel_state->ib->total / 2); + accel_state->vb_total = (accel_state->ib->total / 2); + accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + + (accel_state->ib->total / 2)); + } accel_state->vb_index = 0; + return TRUE; } void r600_vb_discard(ScrnInfoPtr pScrn) { } + +int +r600_cp_start(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + +#if defined(XF86DRM_MODE) + if (info->cs) { + if (!r600_vb_get(pScrn)) + return FALSE; + radeon_cs_space_reset_bos(info->cs); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->src_bo[0]) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[0], + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->src_bo[1]) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[1], + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (accel_state->copy_area_bo) + radeon_cs_space_add_persistent_bo(info->cs, + accel_state->copy_area_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } else +#endif + { + accel_state->ib = RADEONCPGetBuffer(pScrn); + if (!r600_vb_get(pScrn)) { + return -1; + } + } + return 0; +} diff --git a/src/radeon.h b/src/radeon.h index 3a3631e..7fdd8f5 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -696,9 +696,11 @@ struct radeon_accel_state { int vb_total; void *vb_ptr; uint32_t vb_size; + struct radeon_bo *vb_bo; // shader storage ExaOffscreenArea *shaders; + struct radeon_bo *shaders_bo; uint32_t solid_vs_offset; uint32_t solid_ps_offset; uint32_t copy_vs_offset; @@ -710,12 +712,14 @@ struct radeon_accel_state { uint32_t xv_ps_offset; //size/addr stuff + struct radeon_bo *src_bo[2]; uint32_t src_size[2]; uint64_t src_mc_addr[2]; uint32_t src_pitch[2]; uint32_t src_width[2]; uint32_t src_height[2]; uint32_t src_bpp[2]; + struct radeon_bo *dst_bo; uint32_t dst_size; uint64_t dst_mc_addr; uint32_t dst_pitch; @@ -731,6 +735,7 @@ struct radeon_accel_state { // copy ExaOffscreenArea *copy_area; + struct radeon_bo *copy_area_bo; Bool same_surface; int rop; uint32_t planemask; diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c index b52f965..efc6bde 100644 --- a/src/radeon_dri2.c +++ b/src/radeon_dri2.c @@ -333,7 +333,9 @@ radeon_dri2_screen_init(ScreenPtr pScreen) return FALSE; } - if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { + if ( (info->ChipFamily >= CHIP_FAMILY_R600) ) { + dri2_info.driverName = R600_DRIVER_NAME; + } else if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { dri2_info.driverName = R300_DRIVER_NAME; } else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) { dri2_info.driverName = R200_DRIVER_NAME; diff --git a/src/radeon_exa.c b/src/radeon_exa.c index 3f3c9ba..56e87a9 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c @@ -336,7 +336,7 @@ static void RADEONFinishAccess_BE(PixmapPtr pPix, int index) #endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ #ifdef XF86DRM_MODE -static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) +Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) { ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; struct radeon_exa_pixmap_priv *driver_priv; @@ -364,7 +364,7 @@ static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) return TRUE; } -static void RADEONFinishAccess_CS(PixmapPtr pPix, int index) +void RADEONFinishAccess_CS(PixmapPtr pPix, int index) { struct radeon_exa_pixmap_priv *driver_priv; @@ -456,7 +456,7 @@ void *RADEONEXACreatePixmap2(ScreenPtr pScreen, int width, int height, return new_priv; } -static void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) +void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) { struct radeon_exa_pixmap_priv *driver_priv = driverPriv; @@ -489,7 +489,7 @@ void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo) } } -static Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) +Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) { struct radeon_exa_pixmap_priv *driver_priv; -- cgit v1.2.3 From 65852de027989c105246fa4e4eed432f29525a22 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 17:29:42 -0400 Subject: r6xx/r7xx EXA: WIP --- src/r600_exa.c | 5 +++++ src/radeon_kms.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 03d3d8c..b6a1a15 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -128,6 +128,8 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) uint32_t a, r, g, b; float ps_alu_consts[4]; + //return FALSE; + if (pPix->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); if (!R600CheckBPP(pPix->drawable.bitsPerPixel)) @@ -713,6 +715,8 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; + return FALSE; + if (pSrc->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); if (pDst->drawable.bitsPerPixel == 24) @@ -1575,6 +1579,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_config_t cb_conf; shader_config_t vs_conf, ps_conf; + return FALSE; /* return FALSE; */ if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) diff --git a/src/radeon_kms.c b/src/radeon_kms.c index cd398c6..faa0cfd 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -180,7 +180,7 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to allocate accel_state rec!\n"); return FALSE; } - +#if 0 if (info->ChipFamily >= CHIP_FAMILY_R600) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using shadowfb for KMS on R600+\n"); @@ -189,7 +189,7 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn) info->r600_shadow_fb = FALSE; return TRUE; } - +#endif if ((info->ChipFamily == CHIP_FAMILY_RS100) || (info->ChipFamily == CHIP_FAMILY_RS200) || -- cgit v1.2.3 From 2e83cca8d7efaf1a6836cfb9ea5893fd9d70175f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 18:05:43 -0400 Subject: r6xx/r7xx: more cs exa wip --- src/r600_exa.c | 22 ++++++++-------------- src/r600_textured_videofuncs.c | 5 +---- src/r6xx_accel.c | 12 +++++++----- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index b6a1a15..4d09d6e 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -167,8 +167,6 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) #endif r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - RADEON_FALLBACK(("Can't get VB\n")); /* Init */ #if defined(XF86DRM_MODE) @@ -325,8 +323,6 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) { R600DoneSolid(pPix); r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8); @@ -408,6 +404,7 @@ R600DoneSolid(PixmapPtr pPix) accel_state->src_bo[0] = NULL; accel_state->src_bo[1] = NULL; accel_state->dst_bo = NULL; + accel_state->vb_bo = NULL; } static void @@ -449,8 +446,6 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, accel_state->dst_bo = dst_bo; r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - return; /* Init */ start_3d(pScrn, accel_state->ib); @@ -681,8 +676,6 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoCopy(pScrn); r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -1092,6 +1085,7 @@ R600DoneCopy(PixmapPtr pDst) accel_state->src_bo[0] = NULL; accel_state->src_bo[1] = NULL; accel_state->dst_bo = NULL; + accel_state->vb_bo = NULL; } @@ -1627,8 +1621,6 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, CLEAR (ps_conf); r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - RADEON_FALLBACK(("Can't get VB\n")); /* Init */ start_3d(pScrn, accel_state->ib); @@ -1818,8 +1810,6 @@ static void R600Composite(PixmapPtr pDst, if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) { R600DoneComposite(pDst); r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24); @@ -1858,8 +1848,6 @@ static void R600Composite(PixmapPtr pDst, if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoneComposite(pDst); r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -1954,6 +1942,7 @@ static void R600DoneComposite(PixmapPtr pDst) accel_state->src_bo[0] = NULL; accel_state->src_bo[1] = NULL; accel_state->dst_bo = NULL; + accel_state->vb_bo = NULL; } Bool @@ -2374,6 +2363,11 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->XInited3D = FALSE; info->accel_state->copy_area = NULL; + info->accel_state->src_bo[0] = NULL; + info->accel_state->src_bo[1] = NULL; + info->accel_state->dst_bo = NULL; + info->accel_state->copy_area_bo = NULL; + info->accel_state->vb_bo = NULL; if (!R600AllocShaders(pScrn, pScreen)) return FALSE; diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 9cbfea4..631a40c 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -254,8 +254,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) #endif r600_cp_start(pScrn); - if (!r600_vb_get(pScrn)) - return; /* Init */ start_3d(pScrn, accel_state->ib); @@ -587,8 +585,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoneTexturedVideo(pScrn); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 4aa4650..6e4c8ea 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -1188,7 +1188,7 @@ r600_cp_start(ScrnInfoPtr pScrn) #if defined(XF86DRM_MODE) if (info->cs) { if (!r600_vb_get(pScrn)) - return FALSE; + return -1; radeon_cs_space_reset_bos(info->cs); radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); @@ -1198,10 +1198,12 @@ r600_cp_start(ScrnInfoPtr pScrn) if (accel_state->src_bo[1]) radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[1], RADEON_GEM_DOMAIN_VRAM, 0); - radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_bo, - RADEON_GEM_DOMAIN_VRAM, 0); - radeon_cs_space_add_persistent_bo(info->cs, accel_state->vb_bo, - RADEON_GEM_DOMAIN_GTT, 0); + if (accel_state->dst_bo) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->vb_bo) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); if (accel_state->copy_area_bo) radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, -- cgit v1.2.3 From 599adfc1f5e6d708be7ad30f4871de3046775727 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 18:13:14 -0400 Subject: r6xx/r7xx: fix flipped domains --- src/r600_exa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 4d09d6e..1d7802a 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -658,7 +658,7 @@ R600DoCopy(ScrnInfoPtr pScrn) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), accel_state->dst_size, accel_state->dst_mc_addr, - accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); } -- cgit v1.2.3 From bba51187055932ecd466f5f817428d6c773747b9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 18:37:15 -0400 Subject: R6xx/r7xx: unmap vb bo when done --- src/r600_exa.c | 15 +++++++++++++++ src/r600_state.h | 9 ++++++--- src/r600_textured_videofuncs.c | 5 +++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 1d7802a..09eb6ee 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -352,6 +352,11 @@ R600DoneSolid(PixmapPtr pPix) CLEAR (draw_conf); CLEAR (vtx_res); +#ifdef XF86DRM_MODE + if (info->cs) + radeon_bo_unmap(accel_state->vb_bo); +#endif + if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); @@ -614,6 +619,11 @@ R600DoCopy(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); +#ifdef XF86DRM_MODE + if (info->cs) + radeon_bo_unmap(accel_state->vb_bo); +#endif + if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); @@ -1883,6 +1893,11 @@ static void R600DoneComposite(PixmapPtr pDst) CLEAR (draw_conf); CLEAR (vtx_res); +#ifdef XF86DRM_MODE + if (info->cs) + radeon_bo_unmap(accel_state->vb_bo); +#endif + if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); diff --git a/src/r600_state.h b/src/r600_state.h index 6ca88cf..cb039d4 100644 --- a/src/r600_state.h +++ b/src/r600_state.h @@ -188,9 +188,12 @@ do { \ radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); \ } while(0) #define RELOC_BATCH(bo, rd, wd) \ -do { \ - if (info->cs) \ - OUT_RING_RELOC((bo), (rd), (wd)); \ +do { \ + if (info->cs) { \ + int _ret; \ + _ret = radeon_cs_write_reloc(info->cs, (bo), (rd), (wd), 0); \ + if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \ + } \ } while(0) #define E32(ib, dword) \ do { \ diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 631a40c..10d6f4f 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -65,6 +65,11 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); +#ifdef XF86DRM_MODE + if (info->cs) + radeon_bo_unmap(accel_state->vb_bo); +#endif + if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); -- cgit v1.2.3 From 9aa214e125b7927d62b9fe124a851d0373c24d7e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 18:45:49 -0400 Subject: r6xx/r7xx: fix reloc for vtx buffer --- src/r6xx_accel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 6e4c8ea..8a2b1ae 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -494,7 +494,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) E32(ib, 0); // 4: n/a E32(ib, 0); // 5: n/a E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE - RELOC_BATCH(res->bo, RADEON_GEM_DOMAIN_VRAM, 0); + RELOC_BATCH(res->bo, RADEON_GEM_DOMAIN_GTT, 0); END_BATCH(); } -- cgit v1.2.3 From 5a08e68cc254fb255e631b456e331c32456ef0e7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 19:24:41 -0400 Subject: r6xx/r7xx: fix some define problems in Xv code --- src/r600_exa.c | 2 +- src/r600_textured_videofuncs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 09eb6ee..462bbb8 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -128,7 +128,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) uint32_t a, r, g, b; float ps_alu_consts[4]; - //return FALSE; + return FALSE; if (pPix->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 10d6f4f..a6e2559 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -228,7 +228,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) CLEAR (vs_conf); CLEAR (ps_conf); -#if defined(ACCEL_CP) && defined(XF86DRM_MODE) +#if defined(XF86DRM_MODE) if (info->cs) { accel_state->dst_mc_addr = 0; accel_state->src_mc_addr[0] = 0; @@ -269,7 +269,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); -#if defined(ACCEL_CP) && defined(XF86DRM_MODE) +#if defined(XF86DRM_MODE) if (info->cs) { accel_state->vs_mc_addr = accel_state->xv_vs_offset; accel_state->ps_mc_addr = accel_state->xv_ps_offset; -- cgit v1.2.3 From 3212c26b90c0f6f1a7248b4da3ed985a9c2e9381 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Aug 2009 01:42:10 -0400 Subject: r6xx/r7xx: more WIP --- src/r600_exa.c | 41 ++++++++++++----------------------------- src/r600_textured_videofuncs.c | 12 ++++-------- src/r6xx_accel.c | 10 ++++++++++ 3 files changed, 26 insertions(+), 37 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 462bbb8..f2136ae 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -352,11 +352,6 @@ R600DoneSolid(PixmapPtr pPix) CLEAR (draw_conf); CLEAR (vtx_res); -#ifdef XF86DRM_MODE - if (info->cs) - radeon_bo_unmap(accel_state->vb_bo); -#endif - if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); @@ -405,11 +400,6 @@ R600DoneSolid(PixmapPtr pPix) accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); - - accel_state->src_bo[0] = NULL; - accel_state->src_bo[1] = NULL; - accel_state->dst_bo = NULL; - accel_state->vb_bo = NULL; } static void @@ -453,6 +443,10 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); @@ -619,11 +613,6 @@ R600DoCopy(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); -#ifdef XF86DRM_MODE - if (info->cs) - radeon_bo_unmap(accel_state->vb_bo); -#endif - if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); @@ -1092,10 +1081,7 @@ R600DoneCopy(PixmapPtr pDst) exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); accel_state->copy_area = NULL; } - accel_state->src_bo[0] = NULL; - accel_state->src_bo[1] = NULL; - accel_state->dst_bo = NULL; - accel_state->vb_bo = NULL; + } @@ -1611,6 +1597,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, if (info->cs) { accel_state->dst_mc_addr = 0; accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; } else #endif accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; @@ -1633,6 +1621,10 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); @@ -1714,7 +1706,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); - BEGIN_BATCH(12); + BEGIN_BATCH(9); EREG(accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); @@ -1893,11 +1885,6 @@ static void R600DoneComposite(PixmapPtr pDst) CLEAR (draw_conf); CLEAR (vtx_res); -#ifdef XF86DRM_MODE - if (info->cs) - radeon_bo_unmap(accel_state->vb_bo); -#endif - if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); @@ -1954,10 +1941,6 @@ static void R600DoneComposite(PixmapPtr pDst) R600CPFlushIndirect(pScrn, accel_state->ib); - accel_state->src_bo[0] = NULL; - accel_state->src_bo[1] = NULL; - accel_state->dst_bo = NULL; - accel_state->vb_bo = NULL; } Bool diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index a6e2559..b6f7b39 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -65,11 +65,6 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); -#ifdef XF86DRM_MODE - if (info->cs) - radeon_bo_unmap(accel_state->vb_bo); -#endif - if (accel_state->vb_index == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); @@ -117,9 +112,6 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); R600CPFlushIndirect(pScrn, accel_state->ib); - accel_state->dst_bo = NULL; - accel_state->src_bo[0] = NULL; - accel_state->src_bo[1] = NULL; } void @@ -261,6 +253,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 8a2b1ae..7c7f469 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -39,6 +39,7 @@ #include "radeon_drm.h" +#if defined(XF86DRM_MODE) void r600_cs_flush_indirect(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -46,6 +47,11 @@ void r600_cs_flush_indirect(ScrnInfoPtr pScrn) if (!info->cs->cdw) return; + + if (info->accel_state->vb_bo) + radeon_bo_unmap(info->accel_state->vb_bo); + info->accel_state->vb_bo = NULL; + radeon_cs_emit(info->cs); radeon_cs_erase(info->cs); @@ -53,6 +59,7 @@ void r600_cs_flush_indirect(ScrnInfoPtr pScrn) if (ret) ErrorF("space check failed in flush\n"); } +#endif /* Flush the indirect buffer to the kernel for submission to the card */ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) @@ -98,6 +105,9 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) int ret; RADEONInfoPtr info = RADEONPTR(pScrn); if (info->cs) { + if (info->accel_state->vb_bo) + radeon_bo_unmap(info->accel_state->vb_bo); + info->accel_state->vb_bo = NULL; if (CS_FULL(info->cs)) { r600_cs_flush_indirect(pScrn); return; -- cgit v1.2.3 From e87f0f50f31a59ca1f60d4582d4a57ed00854fb7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Aug 2009 02:13:38 -0400 Subject: r6xx/r7xx: set EXA_HANDLES_PIXMAPS --- src/r600_exa.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index f2136ae..cfe041f 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -128,7 +128,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) uint32_t a, r, g, b; float ps_alu_consts[4]; - return FALSE; + //return FALSE; if (pPix->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); @@ -707,7 +707,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; - return FALSE; + //return FALSE; if (pSrc->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); @@ -1569,7 +1569,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_config_t cb_conf; shader_config_t vs_conf, ps_conf; - return FALSE; + //return FALSE; /* return FALSE; */ if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) @@ -2324,6 +2324,17 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; #ifdef EXA_SUPPORTS_PREPARE_AUX info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; +#endif + +#ifdef XF86DRM_MODE +#ifdef EXA_HANDLES_PIXMAPS + if (info->cs) { + info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS; +//#ifdef EXA_MIXED_PIXMAPS +// info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS; +//#endif + } +#endif #endif info->accel_state->exa->pixmapOffsetAlign = 256; info->accel_state->exa->pixmapPitchAlign = 256; -- cgit v1.2.3 From 8f4196e88855f10762254fca9e0a0988e7b5562f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 31 Aug 2009 19:41:59 -0400 Subject: r6xx/r7xx: various CS fixes from Dave --- src/r600_exa.c | 21 +++++++++++---------- src/r6xx_accel.c | 6 ++++-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index cfe041f..db4c0b1 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -720,6 +720,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + accel_state->same_surface = FALSE; #if defined(XF86DRM_MODE) if (info->cs) { @@ -728,11 +729,15 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->src_bo[0] = radeon_get_pixmap_bo(pSrc); accel_state->src_bo[1] = NULL; accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + if (accel_state->dst_bo == accel_state->src_bo[0]) + accel_state->same_surface = TRUE; } else #endif { accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) + accel_state->same_surface = TRUE; } accel_state->src_width[0] = pSrc->drawable.width; @@ -766,9 +771,8 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->rop = rop; accel_state->planemask = planemask; - if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) { + if (accel_state->same_surface == TRUE) { unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; - accel_state->same_surface = TRUE; #if defined(XF86DRM_MODE) if (info->cs) { @@ -802,9 +806,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, } accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); } - } else { - accel_state->same_surface = FALSE; - + } else R600DoPrepareCopy(pScrn, accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, accel_state->src_mc_addr[0], accel_state->src_bo[0], pSrc->drawable.bitsPerPixel, @@ -812,8 +814,6 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->dst_mc_addr, accel_state->dst_bo, pDst->drawable.bitsPerPixel, rop, planemask); - } - return TRUE; } @@ -853,8 +853,8 @@ R600OverlapCopy(PixmapPtr pDst, } #endif - if (is_overlap(srcX, srcX + w, srcY, srcY + h, - dstX, dstX + w, dstY, dstY + h)) { + if (is_overlap(srcX, srcX + (w - 1), srcY, srcY + (h - 1), + dstX, dstX + (w - 1), dstY, dstY + (h - 1))) { /* Calculate height/width of non-overlapping area */ hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX); vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY); @@ -1008,7 +1008,8 @@ R600Copy(PixmapPtr pDst, #endif if (accel_state->same_surface && - is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { + is_overlap(srcX, srcX + (w - 1), srcY, srcY + (h - 1), + dstX, dstX + (w - 1), dstY, dstY + (h - 1))) { if (accel_state->copy_area) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t orig_offset, tmp_offset; diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 7c7f469..6346e52 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -48,9 +48,11 @@ void r600_cs_flush_indirect(ScrnInfoPtr pScrn) if (!info->cs->cdw) return; - if (info->accel_state->vb_bo) + if (info->accel_state->vb_bo) { radeon_bo_unmap(info->accel_state->vb_bo); - info->accel_state->vb_bo = NULL; + radeon_bo_ref(info->accel_state->vb_bo); + info->accel_state->vb_bo = NULL; + } radeon_cs_emit(info->cs); radeon_cs_erase(info->cs); -- cgit v1.2.3 From 853f4c3d1ea8f975ab2855f18d3ae336a4095091 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 8 Sep 2009 11:25:39 +1000 Subject: r600: more alignment fixups + vb map/unmap I'm not so sure the vb map/unmap is a good idea, I think it pretty much locksteps the cpu/gpu, so we should really work out if we really need to flush this often, since mesa doesn't have to and we are just doing 3D ops. --- src/r600_exa.c | 7 +++---- src/r6xx_accel.c | 9 +++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index db4c0b1..c143b69 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -780,8 +780,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, radeon_bo_unref(accel_state->copy_area_bo); accel_state->copy_area_bo = NULL; } - accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, - 4096, + accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); if (accel_state->copy_area_bo == NULL) { @@ -789,7 +788,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, return FALSE; } radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, - RADEON_GEM_DOMAIN_VRAM, 0); + 0, RADEON_GEM_DOMAIN_VRAM); if (radeon_cs_space_check(info->cs)) { radeon_bo_unref(accel_state->copy_area_bo); accel_state->copy_area_bo = NULL; @@ -2157,7 +2156,7 @@ R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) #ifdef XF86DRM_MODE #if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) if (info->cs) { - accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 4096, + accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); if (accel_state->shaders_bo == NULL) { ErrorF("Allocating shader failed\n"); diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 6346e52..985595e 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -50,8 +50,7 @@ void r600_cs_flush_indirect(ScrnInfoPtr pScrn) if (info->accel_state->vb_bo) { radeon_bo_unmap(info->accel_state->vb_bo); - radeon_bo_ref(info->accel_state->vb_bo); - info->accel_state->vb_bo = NULL; + info->accel_state->vb_ptr = NULL; } radeon_cs_emit(info->cs); @@ -1161,15 +1160,17 @@ r600_vb_get(ScrnInfoPtr pScrn) if (accel_state->vb_bo == NULL) { accel_state->vb_mc_addr = 0; accel_state->vb_bo = radeon_bo_open(info->bufmgr, 0, 16 * 1024, - 4096, RADEON_GEM_DOMAIN_GTT, 0); + 0, RADEON_GEM_DOMAIN_GTT, 0); if (accel_state->vb_bo == NULL) return FALSE; + accel_state->vb_total = 16 * 1024; + } + if (!accel_state->vb_ptr) { ret = radeon_bo_map(accel_state->vb_bo, 1); if (ret) { FatalError("failed to vb %d\n", ret); return FALSE; } - accel_state->vb_total = 16 * 1024; accel_state->vb_ptr = accel_state->vb_bo->ptr; } } else -- cgit v1.2.3