From 9460ea864b12ec1fbd11c5d9a20bb5a4279d9d3d Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Sat, 3 Oct 2009 16:33:33 +0200 Subject: Fix KMS on big endian machines. Requires at least xserver 1.7 to work properly. Also make sure the front buffer is and stays tiled if colour tiling is enabled. --- src/drmmode_display.c | 17 +++++++++ src/radeon.h | 1 + src/radeon_drm.h | 7 ++-- src/radeon_exa.c | 13 ++++++- src/radeon_exa_funcs.c | 91 +++++++++++++++++++++++++++++++++++++-------- src/radeon_kms.c | 23 +++++++++++- src/radeon_textured_video.c | 8 +++- src/radeon_video.c | 87 +++++++++++++++++++++++-------------------- 8 files changed, 185 insertions(+), 62 deletions(-) diff --git a/src/drmmode_display.c b/src/drmmode_display.c index c9037b4..ecfc629 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -915,6 +915,7 @@ drmmode_xf86crtc_resize (ScrnInfoPtr scrn, int width, int height) int screen_size; int cpp = info->CurrentLayout.pixel_bytes; struct radeon_bo *front_bo; + uint32_t tiling_flags = 0; if (scrn->virtualX == width && scrn->virtualY == height) return TRUE; @@ -948,6 +949,22 @@ drmmode_xf86crtc_resize (ScrnInfoPtr scrn, int width, int height) if (!info->front_bo) goto fail; + if (info->allowColorTiling) + tiling_flags |= RADEON_TILING_MACRO; +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch (cpp) { + case 4: + tiling_flags |= RADEON_TILING_SWAP_32BIT; + break; + case 2: + tiling_flags |= RADEON_TILING_SWAP_16BIT; + break; + } +#endif + if (tiling_flags) + radeon_bo_set_tiling(info->front_bo, + tiling_flags | RADEON_TILING_SURFACE, pitch * cpp); + ret = drmModeAddFB(drmmode->fd, width, height, scrn->depth, scrn->bitsPerPixel, pitch * cpp, info->front_bo->handle, diff --git a/src/radeon.h b/src/radeon.h index 0322bf0..9d283bb 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -463,6 +463,7 @@ typedef struct _atomBiosHandle *atomBiosHandlePtr; struct radeon_exa_pixmap_priv { struct radeon_bo *bo; int flags; + Bool bo_mapped; }; typedef struct { diff --git a/src/radeon_drm.h b/src/radeon_drm.h index f974e19..49a5f81 100644 --- a/src/radeon_drm.h +++ b/src/radeon_drm.h @@ -802,9 +802,10 @@ struct drm_radeon_gem_create { #define RADEON_TILING_MACRO 0x1 #define RADEON_TILING_MICRO 0x2 -#define RADEON_TILING_SWAP 0x4 -#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface - * when mapped - i.e. front buffer */ +#define RADEON_TILING_SWAP_16BIT 0x4 +#define RADEON_TILING_SWAP_32BIT 0x8 +#define RADEON_TILING_SURFACE 0x10 /* this object requires a surface + * when mapped - i.e. front buffer */ struct drm_radeon_gem_set_tiling { uint32_t handle; diff --git a/src/radeon_exa.c b/src/radeon_exa.c index 6cf9598..99a93a4 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c @@ -339,10 +339,18 @@ static void RADEONFinishAccess_BE(PixmapPtr pPix, int index) #ifdef XF86DRM_MODE Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) { - ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + ScreenPtr pScreen = pPix->drawable.pScreen; + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; struct radeon_exa_pixmap_priv *driver_priv; int ret; +#if X_BYTE_ORDER == X_BIG_ENDIAN + /* May need to handle byte swapping in DownloadFrom/UploadToScreen */ + if (pPix->drawable.bitsPerPixel > 8 && + pPix != pScreen->GetScreenPixmap(pScreen)) + return FALSE; +#endif + driver_priv = exaGetPixmapDriverPrivate(pPix); if (!driver_priv) return FALSE; @@ -357,6 +365,7 @@ Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) FatalError("failed to map pixmap %d\n", ret); return FALSE; } + driver_priv->bo_mapped = TRUE; pPix->devPrivate.ptr = driver_priv->bo->ptr; @@ -368,7 +377,7 @@ void RADEONFinishAccess_CS(PixmapPtr pPix, int index) struct radeon_exa_pixmap_priv *driver_priv; driver_priv = exaGetPixmapDriverPrivate(pPix); - if (!driver_priv) + if (!driver_priv || !driver_priv->bo_mapped) return; radeon_bo_unmap(driver_priv->bo); diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c index 98aca93..d5a3103 100644 --- a/src/radeon_exa_funcs.c +++ b/src/radeon_exa_funcs.c @@ -450,15 +450,18 @@ static Bool RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch) { - RINFO_FROM_SCREEN(pDst->drawable.pScreen); + ScreenPtr pScreen = pDst->drawable.pScreen; + RINFO_FROM_SCREEN(pScreen); struct radeon_exa_pixmap_priv *driver_priv; struct radeon_bo *scratch; + unsigned char *dst; unsigned size; uint32_t datatype = 0; uint32_t dst_domain; uint32_t dst_pitch_offset; unsigned bpp = pDst->drawable.bitsPerPixel; uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63; + uint32_t swap = RADEON_HOST_DATA_SWAP_NONE; Bool r; int i; @@ -466,11 +469,34 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, return FALSE; driver_priv = exaGetPixmapDriverPrivate(pDst); + if (!driver_priv || !driver_priv->bo) + return FALSE; + +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch (bpp) { + case 32: + swap = RADEON_HOST_DATA_SWAP_32BIT; + break; + case 16: + swap = RADEON_HOST_DATA_SWAP_16BIT; + break; + } +#endif /* If we know the BO won't be busy, don't bother */ if (driver_priv->bo->cref == 1 && - !radeon_bo_is_busy(driver_priv->bo, &dst_domain)) + !radeon_bo_is_busy(driver_priv->bo, &dst_domain)) { +#if X_BYTE_ORDER == X_BIG_ENDIAN + /* Can't return FALSE here if we need to swap bytes */ + if (swap != RADEON_HOST_DATA_SWAP_NONE && + driver_priv->bo != info->front_bo) { + scratch = driver_priv->bo; + scratch_pitch = pDst->devKind; + goto copy; + } +#endif return FALSE; + } size = scratch_pitch * h; scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); @@ -486,6 +512,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, goto out; } +copy: r = radeon_bo_map(scratch, 0); if (r) { r = FALSE; @@ -493,22 +520,28 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, } r = TRUE; size = w * bpp / 8; + dst = scratch->ptr; + if (scratch == driver_priv->bo) + dst += y * scratch_pitch + x * bpp / 8; for (i = 0; i < h; i++) { - memcpy(scratch->ptr + i * scratch_pitch, src, size); + RADEONCopySwap(dst + i * scratch_pitch, (uint8_t*)src, size, swap); src += src_pitch; } radeon_bo_unmap(scratch); - RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype); - RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset); - ACCEL_PREAMBLE(); - RADEON_SWITCH_TO_2D(); - RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16, - dst_pitch_offset, 0, 0, x, y, w, h, - RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM); + if (scratch != driver_priv->bo) { + RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype); + RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset); + ACCEL_PREAMBLE(); + RADEON_SWITCH_TO_2D(); + RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16, + dst_pitch_offset, 0, 0, x, y, w, h, + RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM); + } out: - radeon_bo_unref(scratch); + if (scratch != driver_priv->bo) + radeon_bo_unref(scratch); return r; } @@ -525,12 +558,26 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, uint32_t src_pitch_offset; unsigned bpp = pSrc->drawable.bitsPerPixel; uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63; + uint32_t swap = RADEON_HOST_DATA_SWAP_NONE; Bool r; if (bpp < 8) return FALSE; driver_priv = exaGetPixmapDriverPrivate(pSrc); + if (!driver_priv || !driver_priv->bo) + return FALSE; + +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch (bpp) { + case 32: + swap = RADEON_HOST_DATA_SWAP_32BIT; + break; + case 16: + swap = RADEON_HOST_DATA_SWAP_16BIT; + break; + } +#endif /* If we know the BO won't end up in VRAM anyway, don't bother */ if (driver_priv->bo->cref > 1) { @@ -546,8 +593,17 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, if (!src_domain) radeon_bo_is_busy(driver_priv->bo, &src_domain); - if (src_domain != RADEON_GEM_DOMAIN_VRAM) + if (src_domain != RADEON_GEM_DOMAIN_VRAM) { +#if X_BYTE_ORDER == X_BIG_ENDIAN + /* Can't return FALSE here if we need to swap bytes */ + if (swap != RADEON_HOST_DATA_SWAP_NONE) { + scratch = driver_priv->bo; + scratch_pitch = pSrc->devKind; + goto copy; + } +#endif return FALSE; + } size = scratch_pitch * h; scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); @@ -572,6 +628,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, RADEON_GEM_DOMAIN_GTT); FLUSH_RING(); +copy: r = radeon_bo_map(scratch, 0); if (r) { r = FALSE; @@ -579,15 +636,19 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, } r = TRUE; w *= bpp / 8; - size = 0; + if (scratch == driver_priv->bo) + size = y * scratch_pitch + x * bpp / 8; + else + size = 0; while (h--) { - memcpy(dst, scratch->ptr + size, w); + RADEONCopySwap((uint8_t*)dst, scratch->ptr + size, w, swap); size += scratch_pitch; dst += dst_pitch; } radeon_bo_unmap(scratch); out: - radeon_bo_unref(scratch); + if (scratch != driver_priv->bo) + radeon_bo_unref(scratch); return r; } #endif diff --git a/src/radeon_kms.c b/src/radeon_kms.c index bd6020b..432cee7 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -887,6 +887,11 @@ static Bool radeon_setup_kernel_mem(ScreenPtr pScreen) return FALSE; } +#if X_BYTE_ORDER == X_BIG_ENDIAN + radeon_bo_set_tiling(info->cursor_bo[c], RADEON_TILING_SWAP_32BIT | + RADEON_TILING_SURFACE, stride); +#endif + if (radeon_bo_map(info->cursor_bo[c], 1)) { ErrorF("Failed to map cursor buffer memory\n"); } @@ -907,6 +912,8 @@ static Bool radeon_setup_kernel_mem(ScreenPtr pScreen) info->dri->textureSize = 0; if (info->front_bo == NULL) { + uint32_t tiling_flags = 0; + info->front_bo = radeon_bo_open(info->bufmgr, 0, screen_size, 0, RADEON_GEM_DOMAIN_VRAM, 0); if (info->r600_shadow_fb == TRUE) { @@ -915,8 +922,22 @@ static Bool radeon_setup_kernel_mem(ScreenPtr pScreen) } } if (info->allowColorTiling) { - radeon_bo_set_tiling(info->front_bo, RADEON_TILING_MACRO, stride); + tiling_flags |= RADEON_TILING_MACRO; } +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch (cpp) { + case 4: + tiling_flags |= RADEON_TILING_SWAP_32BIT; + break; + case 2: + tiling_flags |= RADEON_TILING_SWAP_16BIT; + break; + } +#endif + if (tiling_flags) { + radeon_bo_set_tiling(info->front_bo, + tiling_flags | RADEON_TILING_SURFACE, stride); + } } xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Front buffer size: %dK\n", info->front_bo->size/1024); diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index 04a2401..d6b221f 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -709,7 +709,13 @@ Bool radeon_load_bicubic_texture(ScrnInfoPtr pScrn) } else bicubic_addr = (uint8_t *)(info->FB + info->bicubic_offset); - RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512, bicubic_addr, 1024, 1024, 1, 512, 2); + RADEONCopySwap(bicubic_addr, (uint8_t *)bicubic_tex_512, 1024, +#if X_BYTE_ORDER == X_BIG_ENDIAN + RADEON_HOST_DATA_SWAP_16BIT +#else + RADEON_HOST_DATA_SWAP_NONE +#endif +); if (info->cs) radeon_bo_unmap(info->bicubic_bo); } diff --git a/src/radeon_video.c b/src/radeon_video.c index f1fe72b..7aaa266 100644 --- a/src/radeon_video.c +++ b/src/radeon_video.c @@ -2198,36 +2198,37 @@ RADEONCopyData( else #endif /* XF86DRI */ { -#if X_BYTE_ORDER == X_BIG_ENDIAN - unsigned char *RADEONMMIO = info->MMIO; - unsigned int swapper = info->ModeReg->surface_cntl & - ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP | - RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP); + int swap = RADEON_HOST_DATA_SWAP_NONE; - switch(bpp) { - case 2: - swapper |= RADEON_NONSURF_AP0_SWP_16BPP - | RADEON_NONSURF_AP1_SWP_16BPP; - break; - case 4: - swapper |= RADEON_NONSURF_AP0_SWP_32BPP - | RADEON_NONSURF_AP1_SWP_32BPP; - break; +#if X_BYTE_ORDER == X_BIG_ENDIAN + if (info->kms_enabled) { + switch(bpp) { + case 2: + swap = RADEON_HOST_DATA_SWAP_16BIT; + break; + case 4: + swap = RADEON_HOST_DATA_SWAP_32BIT; + break; + } + } else if (bpp != pScrn->bitsPerPixel) { + if (bpp == 8) + swap = RADEON_HOST_DATA_SWAP_32BIT; + else + swap = RADEON_HOST_DATA_SWAP_HDW; } - OUTREG(RADEON_SURFACE_CNTL, swapper); #endif + w *= bpp; - while (h--) { - memcpy(dst, src, w); - src += srcPitch; - dst += dstPitch; + if (dstPitch == w && dstPitch == srcPitch) + RADEONCopySwap(dst, src, h * dstPitch, swap); + else { + while (h--) { + RADEONCopySwap(dst, src, w, swap); + src += srcPitch; + dst += dstPitch; + } } - -#if X_BYTE_ORDER == X_BIG_ENDIAN - /* restore byte swapping */ - OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); -#endif } } @@ -2282,9 +2283,10 @@ RADEONCopyRGB24Data( { #if X_BYTE_ORDER == X_BIG_ENDIAN unsigned char *RADEONMMIO = info->MMIO; - OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg->surface_cntl - | RADEON_NONSURF_AP0_SWP_32BPP) - & ~RADEON_NONSURF_AP0_SWP_16BPP); + + if (!info->kms_enabled) + OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl & + ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP0_SWP_32BPP)); #endif for (j = 0; j < h; j++) { @@ -2292,13 +2294,15 @@ RADEONCopyRGB24Data( sptr = src + j * srcPitch; for (i = 0; i < w; i++, sptr += 3) { - dptr[i] = (sptr[2] << 16) | (sptr[1] << 8) | sptr[0]; + dptr[i] = cpu_to_le32((sptr[2] << 16) | (sptr[1] << 8) | sptr[0]); } } #if X_BYTE_ORDER == X_BIG_ENDIAN - /* restore byte swapping */ - OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); + if (!info->kms_enabled) { + /* restore byte swapping */ + OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); + } #endif } } @@ -2377,9 +2381,10 @@ RADEONCopyMungedData( #if X_BYTE_ORDER == X_BIG_ENDIAN unsigned char *RADEONMMIO = info->MMIO; - OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg->surface_cntl - | RADEON_NONSURF_AP0_SWP_32BPP) - & ~RADEON_NONSURF_AP0_SWP_16BPP); + + if (!info->kms_enabled) + OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl & + ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP0_SWP_32BPP)); #endif w /= 2; @@ -2391,16 +2396,16 @@ RADEONCopyMungedData( i = w; while( i > 4 ) { - dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24); - dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24); - dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24); - dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24); + dst[0] = cpu_to_le32(s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24)); + dst[1] = cpu_to_le32(s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24)); + dst[2] = cpu_to_le32(s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24)); + dst[3] = cpu_to_le32(s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24)); dst += 4; s2 += 4; s3 += 4; s1 += 8; i -= 4; } while( i-- ) { - dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24); + dst[0] = cpu_to_le32(s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24)); dst++; s2++; s3++; s1 += 2; } @@ -2414,8 +2419,10 @@ RADEONCopyMungedData( } } #if X_BYTE_ORDER == X_BIG_ENDIAN - /* restore byte swapping */ - OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); + if (!info->kms_enabled) { + /* restore byte swapping */ + OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl); + } #endif } } -- cgit v1.2.3