summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-07-01 14:55:44 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2011-07-01 21:41:23 +0100
commit120c98ac10435c8e848a8337c1f544f81a05cd3a (patch)
tree2c69846fad530f2d45d29ebfcae2783955b3872a
parentf6c8c3bb6fd75bca6c7704b7d5869a5d44ce3832 (diff)
sna: Downsample sources 2x too large to fit in the 3D pipeline
This is quite trivial to hit given the 2k limits on gen2/gen3. We compromise on image quality by pre-downscaling the source by a fixed factor to make it fit into the pipeline in preference to performing the entire operation on the CPU. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/Makefile.am1
-rw-r--r--src/sna/kgem.c58
-rw-r--r--src/sna/kgem.h6
-rw-r--r--src/sna/kgem_debug.c4
-rw-r--r--src/sna/kgem_debug.h3
-rw-r--r--src/sna/kgem_debug_gen3.c5
-rw-r--r--src/sna/sna_render.c211
7 files changed, 283 insertions, 5 deletions
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index dfd8a57f..d76480da 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -90,6 +90,7 @@ if DEBUG
libsna_la_SOURCES += \
kgem_debug.c \
kgem_debug.h \
+ kgem_debug_gen2.c \
kgem_debug_gen3.c \
kgem_debug_gen4.c \
kgem_debug_gen5.c \
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 88b1d837..6fe6e936 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1819,6 +1819,64 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
return bo;
}
+struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
+ pixman_format_code_t format,
+ const void *data,
+ int x, int y,
+ int width, int height,
+ int stride, int bpp)
+{
+ int dst_stride = ALIGN(width * bpp / 2, 32) >> 3;
+ int size = dst_stride * height / 2;
+ struct kgem_bo *bo;
+ pixman_image_t *src_image, *dst_image;
+ pixman_transform_t t;
+ void *dst;
+
+ DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
+ __FUNCTION__, x, y, width, height, stride, bpp));
+
+ bo = kgem_create_buffer(kgem, size, KGEM_BUFFER_WRITE, &dst);
+ if (bo == NULL)
+ return NULL;
+
+ dst_image = pixman_image_create_bits(format, width/2, height/2,
+ dst, dst_stride);
+ if (dst_image == NULL)
+ goto cleanup_bo;
+
+ src_image = pixman_image_create_bits(format, width, height,
+ (uint32_t*)data, stride);
+ if (src_image == NULL)
+ goto cleanup_dst;
+
+ memset(&t, 0, sizeof(t));
+ t.matrix[0][0] = 2 << 16;
+ t.matrix[1][1] = 2 << 16;
+ t.matrix[2][2] = 1 << 16;
+ pixman_image_set_transform(src_image, &t);
+ pixman_image_set_filter(src_image, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+ pixman_image_composite(PIXMAN_OP_SRC,
+ src_image, NULL, dst_image,
+ x, y,
+ 0, 0,
+ 0, 0,
+ width/2, height/2);
+
+ pixman_image_unref(src_image);
+ pixman_image_unref(dst_image);
+
+ bo->pitch = dst_stride;
+ return bo;
+
+cleanup_dst:
+ pixman_image_unref(dst_image);
+cleanup_bo:
+ kgem_bo_destroy(kgem, bo);
+ return NULL;
+}
+
void kgem_buffer_sync(struct kgem *kgem, struct kgem_bo *_bo)
{
struct kgem_partial_bo *bo;
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index fac30afa..013809c8 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -141,6 +141,12 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
int x, int y,
int width, int height,
int stride, int bpp);
+struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
+ pixman_format_code_t format,
+ const void *data,
+ int x, int y,
+ int width, int height,
+ int stride, int bpp);
int kgem_choose_tiling(struct kgem *kgem,
int tiling, int width, int height, int bpp);
diff --git a/src/sna/kgem_debug.c b/src/sna/kgem_debug.c
index 0dcd7065..20fe8a2e 100644
--- a/src/sna/kgem_debug.c
+++ b/src/sna/kgem_debug.c
@@ -352,6 +352,8 @@ static int (*decode_3d(int gen))(struct kgem*, uint32_t)
return kgem_gen4_decode_3d;
} else if (gen >= 30) {
return kgem_gen3_decode_3d;
+ } else if (gen >= 20) {
+ return kgem_gen2_decode_3d;
}
assert(0);
}
@@ -366,6 +368,8 @@ static void (*finish_state(int gen))(struct kgem*)
return kgem_gen4_finish_state;
} else if (gen >= 30) {
return kgem_gen3_finish_state;
+ } else if (gen >= 20) {
+ return kgem_gen2_finish_state;
}
assert(0);
}
diff --git a/src/sna/kgem_debug.h b/src/sna/kgem_debug.h
index f9a931df..9211dcb3 100644
--- a/src/sna/kgem_debug.h
+++ b/src/sna/kgem_debug.h
@@ -25,4 +25,7 @@ void kgem_gen4_finish_state(struct kgem *kgem);
int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset);
void kgem_gen3_finish_state(struct kgem *kgem);
+int kgem_gen2_decode_3d(struct kgem *kgem, uint32_t offset);
+void kgem_gen2_finish_state(struct kgem *kgem);
+
#endif
diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c
index da1d9fc9..6709a8ec 100644
--- a/src/sna/kgem_debug_gen3.c
+++ b/src/sna/kgem_debug_gen3.c
@@ -1552,9 +1552,6 @@ out:
int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
{
- uint32_t opcode;
- unsigned int idx;
-
struct {
uint32_t opcode;
int min_len;
@@ -1572,6 +1569,8 @@ int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
};
uint32_t *data = kgem->batch + offset;
+ uint32_t opcode;
+ unsigned int idx;
opcode = (data[0] & 0x1f000000) >> 24;
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 72a3c1e2..baf51c32 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -427,6 +427,212 @@ sna_render_pixmap_bo(struct sna *sna,
return bo != NULL;
}
+static int sna_render_picture_downsample(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y)
+{
+ struct kgem_bo *bo = NULL;
+ PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
+ int16_t ox, oy, ow, oh;
+ BoxRec box;
+
+ assert(w && h);
+
+ DBG(("%s (%d, %d)x(%d, %d) [dst=(%d, %d)]\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ ow = w;
+ oh = h;
+
+ ox = box.x1 = x;
+ oy = box.y1 = y;
+ box.x2 = x + w;
+ box.y2 = y + h;
+ if (channel->transform) {
+ pixman_vector_t v;
+
+ pixman_transform_bounds(channel->transform, &box);
+
+ v.vector[0] = ox << 16;
+ v.vector[1] = oy << 16;
+ v.vector[2] = 1 << 16;
+ pixman_transform_point(channel->transform, &v);
+ ox = v.vector[0] / v.vector[2];
+ oy = v.vector[1] / v.vector[2];
+ }
+
+ if (channel->repeat != RepeatNone) {
+ if (box.x1 < 0 ||
+ box.y1 < 0 ||
+ box.x2 > pixmap->drawable.width ||
+ box.y2 > pixmap->drawable.height) {
+ /* XXX tiled repeats? */
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+
+ if (!channel->is_affine) {
+ DBG(("%s: fallback -- repeating project transform too large for texture\n",
+ __FUNCTION__));
+ return sna_render_picture_fixup(sna,
+ picture,
+ channel,
+ x, y, ow, oh,
+ dst_x, dst_y);
+ }
+ }
+ } else {
+ if (box.x1 < 0)
+ box.x1 = 0;
+ if (box.y1 < 0)
+ box.y1 = 0;
+ if (box.x2 > pixmap->drawable.width)
+ box.x2 = pixmap->drawable.width;
+ if (box.y2 > pixmap->drawable.height)
+ box.y2 = pixmap->drawable.height;
+ }
+
+ w = box.x2 - box.x1;
+ h = box.y2 - box.y1;
+ assert(w && h);
+ if (w > 2*sna->render.max_3d_size || h > 2*sna->render.max_3d_size)
+ goto fixup;
+
+ if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
+ bo = kgem_upload_source_image_halved(&sna->kgem,
+ picture->format,
+ pixmap->devPrivate.ptr,
+ box.x1, box.y1, w, h,
+ pixmap->devKind,
+ pixmap->drawable.bitsPerPixel);
+ if (!bo) {
+ DBG(("%s: failed to upload source image, using clear\n",
+ __FUNCTION__));
+ return 0;
+ }
+ } else {
+ ScreenPtr screen = pixmap->drawable.pScreen;
+ PicturePtr tmp_src, tmp_dst;
+ PictFormatPtr format;
+ struct sna_pixmap *priv;
+ pixman_transform_t t;
+ PixmapPtr tmp;
+ int error, i, j, ww, hh;
+
+ if (!sna_pixmap_force_to_gpu(pixmap))
+ goto fixup;
+
+ tmp = screen->CreatePixmap(screen,
+ w/2, h/2, pixmap->drawable.depth,
+ CREATE_PIXMAP_USAGE_SCRATCH);
+ if (!tmp)
+ goto fixup;
+
+ priv = sna_pixmap(tmp);
+ if (!priv) {
+ screen->DestroyPixmap(tmp);
+ goto fixup;
+ }
+
+ format = PictureMatchFormat(screen,
+ pixmap->drawable.depth,
+ picture->format);
+
+ tmp_dst = CreatePicture(0, &tmp->drawable, format, 0, NULL,
+ serverClient, &error);
+
+ tmp_src = CreatePicture(0, &pixmap->drawable, format, 0, NULL,
+ serverClient, &error);
+ tmp_src->filter = PictFilterBilinear;
+ memset(&t, 0, sizeof(t));
+ t.matrix[0][0] = 2 << 16;
+ t.matrix[1][1] = 2 << 16;
+ t.matrix[2][2] = 1 << 16;
+ tmp_src->transform = &t;
+
+ ValidatePicture(tmp_dst);
+ ValidatePicture(tmp_src);
+
+ ww = w/4; hh = h/4;
+
+ DBG(("%s downsampling using %dx%d GPU tiles\n",
+ __FUNCTION__, ww, hh));
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ struct sna_composite_op op;
+ BoxRec b;
+
+ memset(&op, 0, sizeof(op));
+ if (!sna->render.composite(sna,
+ PictOpSrc,
+ tmp_src, NULL, tmp_dst,
+ box.x1 + ww*j, box.y1 + hh*i,
+ 0, 0,
+ ww*j, hh*i,
+ ww, hh,
+ &op)) {
+ tmp_src->transform = NULL;
+ FreePicture(tmp_src, 0);
+ FreePicture(tmp_dst, 0);
+ screen->DestroyPixmap(tmp);
+ goto fixup;
+ }
+
+ b.x1 = ww*j;
+ b.y1 = hh*i;
+ b.x2 = b.x1 + ww;
+ b.y2 = b.y1 + hh;
+
+ op.boxes(sna, &op, &b, 1);
+ op.done(sna, &op);
+ }
+ }
+
+ bo = kgem_bo_reference(priv->gpu_bo);
+
+ tmp_src->transform = NULL;
+ FreePicture(tmp_src, 0);
+ FreePicture(tmp_dst, 0);
+ screen->DestroyPixmap(tmp);
+ }
+
+ if (ox == x && oy == y) {
+ x = y = 0;
+ } else if (channel->transform) {
+ pixman_vector_t v;
+ pixman_transform_t m;
+
+ v.vector[0] = (ox - box.x1) << 16;
+ v.vector[1] = (oy - box.y1) << 16;
+ v.vector[2] = 1 << 16;
+ pixman_transform_invert(&m, channel->transform);
+ pixman_transform_point(&m, &v);
+ x = v.vector[0] / v.vector[2];
+ y = v.vector[1] / v.vector[2];
+ } else {
+ x = ox - box.x1;
+ y = oy - box.y1;
+ }
+
+ channel->offset[0] = x - dst_x;
+ channel->offset[1] = y - dst_y;
+ channel->scale[0] = 1./w;
+ channel->scale[1] = 1./h;
+ channel->width = w / 2;
+ channel->height = h / 2;
+ channel->bo = bo;
+ return 1;
+
+fixup:
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h,
+ dst_x, dst_y);
+}
+
int
sna_render_picture_extract(struct sna *sna,
PicturePtr picture,
@@ -517,8 +723,9 @@ sna_render_picture_extract(struct sna *sna,
if (w > sna->render.max_3d_size || h > sna->render.max_3d_size) {
DBG(("%s: fallback -- sample too large for texture (%d, %d)x(%d, %d)\n",
__FUNCTION__, box.x1, box.y1, w, h));
- return sna_render_picture_fixup(sna, picture, channel,
- x, y, ow, oh, dst_x, dst_y);
+ return sna_render_picture_downsample(sna, picture, channel,
+ x, y, ow, oh,
+ dst_x, dst_y);
}
if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {