summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2006-12-07 22:21:52 +1100
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2006-12-07 22:21:52 +1100
commit262cbe9e5fa12f8b591756903511d0e15a96185e (patch)
tree1ab9a4dec807eb7c626d6a6b4d8fba43286c97fa
parent4d75f099fbb83303f67792fbdcd742cac6c9b3e1 (diff)
Add some basic altivec support
This adds altivec versions of argb32_over_argb32 and argb32_source_argb32 operations and a bit of altivec optimisation to the fbdev put span code as well. It could probably be further optimized, and with some patience, one could implement the full set of primitives for altivec instead of just those two, but that's at least a beginning. I haven't properly measured the difference in performances yet. It is definitely faster on my powerbook though. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--Makefile.am1
-rw-r--r--libtwin/twin.h10
-rw-r--r--libtwin/twin_draw.c14
-rw-r--r--libtwin/twin_fbdev.c54
-rw-r--r--libtwin/twin_primitive.c137
-rw-r--r--libtwin/twin_screen.c32
-rw-r--r--libtwin/twinint.h11
-rw-r--r--twin_demos/ftwin.c2
-rw-r--r--twin_demos/xtwin.c6
9 files changed, 254 insertions, 13 deletions
diff --git a/Makefile.am b/Makefile.am
index 6b5cbfa..44a0604 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -14,6 +14,7 @@ libtwin_libtwin_la_SOURCES = \
libtwin/twin_cursor.c \
libtwin/twin_dispatch.c \
libtwin/twin_draw.c \
+ libtwin/twin_feature.c \
libtwin/twin_hull.c \
libtwin/twin_icon.c \
libtwin/twin_file.c \
diff --git a/libtwin/twin.h b/libtwin/twin.h
index 8755da6..065cdc9 100644
--- a/libtwin/twin.h
+++ b/libtwin/twin.h
@@ -48,6 +48,16 @@ typedef enum { TWIN_A8, TWIN_RGB16, TWIN_ARGB32 } twin_format_t;
#define twin_bytes_per_pixel(format) (1 << (twin_coord_t) (format))
/*
+ * Features
+ */
+
+#define TWIN_FEATURE_ALTIVEC 0x00000001
+
+void twin_feature_init(void);
+int twin_has_feature(unsigned int feature);
+
+
+/*
* Angles
*/
typedef int16_t twin_angle_t; /* -2048 .. 2048 for -180 .. 180 */
diff --git a/libtwin/twin_draw.c b/libtwin/twin_draw.c
index 48a9da6..1c117a0 100644
--- a/libtwin/twin_draw.c
+++ b/libtwin/twin_draw.c
@@ -694,3 +694,17 @@ twin_fill (twin_pixmap_t *dst,
(*op) (twin_pixmap_pointer (dst, left, iy), src, right - left);
twin_pixmap_damage (dst, left, top, right, bottom);
}
+
+void
+_twin_draw_set_features(void)
+{
+#ifdef HAVE_ALTIVEC
+ if (twin_has_feature(TWIN_FEATURE_ALTIVEC)) {
+ comp2[TWIN_SOURCE][TWIN_ARGB32][TWIN_ARGB32] =
+ _twin_vec_argb32_source_argb32;
+ comp2[TWIN_OVER][TWIN_ARGB32][TWIN_ARGB32] =
+ _twin_vec_argb32_over_argb32;
+ }
+#endif /* HAVE_ALTIVEC */
+}
+
diff --git a/libtwin/twin_fbdev.c b/libtwin/twin_fbdev.c
index a81a658..032cd28 100644
--- a/libtwin/twin_fbdev.c
+++ b/libtwin/twin_fbdev.c
@@ -41,6 +41,11 @@
#include <linux/input.h>
#include "twin_fbdev.h"
+#include "twinint.h"
+
+#ifdef HAVE_ALTIVEC
+#include <altivec.h>
+#endif
#define _IMMEDIATE_REFRESH
@@ -76,6 +81,44 @@ static void _twin_fbdev_put_span (twin_coord_t left,
*(dest++) = *(pixels++);
}
+#ifdef HAVE_ALTIVEC
+static void _twin_fbdev_vec_put_span (twin_coord_t left,
+ twin_coord_t top,
+ twin_coord_t right,
+ twin_argb32_t *pixels,
+ void *closure)
+{
+ twin_fbdev_t *tf = closure;
+ twin_coord_t width = right - left;
+ unsigned int *dest;
+ vector unsigned char edgeperm;
+ vector unsigned char src0v, src1v, srcv;
+
+ if (!tf->active || tf->fb_base == MAP_FAILED)
+ return;
+
+ dest = (unsigned int *)(tf->fb_ptr + top * tf->fb_fix.line_length);
+ dest += left;
+
+ while((((unsigned long)dest) & 0xf) && width--)
+ *(dest++) = *(pixels++);
+
+ edgeperm = vec_lvsl (0, pixels);
+ src0v = vec_ld (0, pixels);
+ while(width >= 4) {
+ src1v = vec_ld (16, pixels);
+ srcv = vec_perm (src0v, src1v, edgeperm);
+ vec_st ((vector unsigned int)srcv, 0, dest);
+ src0v = src1v;
+ dest += 4;
+ pixels += 4;
+ width -= 4;
+ }
+ while(width--)
+ *(dest++) = *(pixels++);
+}
+#endif /* HAVE_ALTIVEC */
+
static twin_bool_t twin_fbdev_apply_config(twin_fbdev_t *tf)
{
off_t off, pgsize = getpagesize();
@@ -426,10 +469,17 @@ static twin_bool_t twin_fbdev_init_fb(twin_fbdev_t *tf)
static twin_bool_t twin_fbdev_init_screen(twin_fbdev_t *tf)
{
+ twin_put_span_t span;
+
+ span = _twin_fbdev_put_span;
+#ifdef HAVE_ALTIVEC
+ if (twin_has_feature(TWIN_FEATURE_ALTIVEC))
+ span = _twin_fbdev_vec_put_span;
+#endif
+
tf->screen = twin_screen_create(tf->fb_var.xres,
tf->fb_var.yres,
- NULL,
- _twin_fbdev_put_span, tf);
+ NULL, span, tf);
if (tf->screen == NULL) {
IERROR("can't create twin screen");
return 0;
diff --git a/libtwin/twin_primitive.c b/libtwin/twin_primitive.c
index 362cbf1..28684cb 100644
--- a/libtwin/twin_primitive.c
+++ b/libtwin/twin_primitive.c
@@ -263,5 +263,140 @@ make_twin_op_srcs(op,argb32) \
make_twin_op_srcs(op,rgb16) \
make_twin_op_srcs(op,a8)
-make_twin_op_dsts_srcs(over)
+make_twin_op_dsts_srcs(over);
make_twin_op_dsts_srcs(source)
+
+#ifdef HAVE_ALTIVEC
+
+#include <altivec.h>
+
+#define VUNALIGNED(p) (((unsigned long)(p)) & 0xf)
+
+
+/* Altivec over function, some bits inspired by SDL */
+static inline vector unsigned int over_v (vector unsigned char dst,
+ vector unsigned char src
+ )
+{
+ const vector unsigned char alphasplit =
+ vec_and (vec_lvsl (0, (int *)NULL), vec_splat_u8(0x0c));
+ const vector unsigned char merge =
+ vec_add(vec_lvsl(0, (int *)NULL),
+ (vector unsigned char)vec_splat_u16(0x0f));
+ vector unsigned char alpha, alphainv;
+ vector unsigned short dmule, dmulo;
+ const vector unsigned short v80 = vec_sl(vec_splat_u16(1), vec_splat_u16(7));
+ const vector unsigned short v8= vec_splat_u8(8);
+
+ /* get source alpha values all over the vector */
+ alpha = vec_perm(src, src, alphasplit);
+
+ /* invert alpha */
+ alphainv = vec_nor(alpha, alpha);
+
+ /* multiply destination values with inverse alpha into 2 u16 vectors */
+ dmule = vec_mule(dst, alphainv);
+ dmulo = vec_mulo(dst, alphainv);
+
+ /* round and merge back */
+ dmule = vec_add(dmule, v80);
+ dmulo = vec_add(dmulo, v80);
+ dmule = vec_add(dmule, vec_sr(dmule, v8));
+ dmulo = vec_add(dmulo, vec_sr(dmulo, v8));
+ dst = vec_perm(dmule, dmulo, merge);
+
+ /* return added value */
+ return vec_adds(dst, src);
+}
+
+void _twin_vec_argb32_over_argb32 (twin_pointer_t dst,
+ twin_source_u src,
+ int width)
+{
+ twin_argb32_t dst32;
+ twin_argb32_t src32;
+ vector unsigned char edgeperm;
+ vector unsigned char src0v, src1v, srcv, dstv;
+
+
+ /* Go scalar for small amounts as I can't be bothered */
+ if (width < 8) {
+ _twin_argb32_over_argb32(dst, src, width);
+ return;
+ }
+
+ /* first run scalar until destination is aligned */
+ while (VUNALIGNED(dst.v) && width--) {
+ dst32 = dst_argb32_get;
+ src32 = src_argb32;
+ dst32 = over (dst32, src32);
+ dst_argb32_set (dst32);
+ }
+
+ /* maybe we should have a special "aligned" version to avoid those
+ * permutations...
+ */
+ edgeperm = vec_lvsl (0, src.p.argb32);
+ src0v = vec_ld (0, src.p.argb32);
+ while(width >= 4) {
+ dstv = vec_ld (0, dst.argb32);
+ src1v = vec_ld (16, src.p.argb32);
+ srcv = vec_perm (src0v, src1v, edgeperm);
+ dstv = over_v (dstv, srcv);
+ vec_st ((vector unsigned int)dstv, 0, dst.argb32);
+ src.p.argb32 += 4;
+ dst.argb32 += 4;
+ src0v = src1v;
+ width -= 4;
+ }
+
+ /* then run scalar again for remaining bits */
+ while (width--) {
+ dst32 = dst_argb32_get;
+ src32 = src_argb32;
+ dst32 = over (dst32, src32);
+ dst_argb32_set (dst32);
+ }
+}
+
+void _twin_vec_argb32_source_argb32 (twin_pointer_t dst,
+ twin_source_u src,
+ int width)
+{
+ twin_argb32_t dst32;
+ twin_argb32_t src32;
+ vector unsigned char edgeperm;
+ vector unsigned char src0v, src1v, srcv;
+
+
+ /* first run scalar until destination is aligned */
+ while (VUNALIGNED(dst.v) && width--) {
+ src32 = src_argb32;
+ dst32 = src32;
+ dst_argb32_set (dst32);
+ }
+
+ /* maybe we should have a special "aligned" version to avoid those
+ * permutations...
+ */
+ edgeperm = vec_lvsl (0, src.p.argb32);
+ src0v = vec_ld (0, src.p.argb32);
+ while(width >= 4) {
+ src1v = vec_ld (16, src.p.argb32);
+ srcv = vec_perm (src0v, src1v, edgeperm);
+ vec_st ((vector unsigned int)srcv, 0, dst.argb32);
+ src.p.argb32 += 4;
+ dst.argb32 += 4;
+ src0v = src1v;
+ width -= 4;
+ }
+
+ /* then run scalar again for remaining bits */
+ while (width--) {
+ src32 = src_argb32;
+ dst32 = src32;
+ dst_argb32_set (dst32);
+ }
+}
+
+#endif /* HAVE_ALTIVEC */
diff --git a/libtwin/twin_screen.c b/libtwin/twin_screen.c
index 9ea46c7..26fddee 100644
--- a/libtwin/twin_screen.c
+++ b/libtwin/twin_screen.c
@@ -143,11 +143,12 @@ twin_screen_damaged (twin_screen_t *screen)
static void
twin_screen_span_pixmap(twin_screen_t *screen, twin_argb32_t *span,
twin_pixmap_t *p, twin_coord_t y,
- twin_coord_t left, twin_coord_t right)
+ twin_coord_t left, twin_coord_t right,
+ twin_src_op op16, twin_src_op op32)
{
twin_pointer_t dst;
- twin_source_u src;
- twin_coord_t p_left, p_right;
+ twin_source_u src;
+ twin_coord_t p_left, p_right;
/* bounds check in y */
if (y < p->y)
@@ -166,9 +167,9 @@ twin_screen_span_pixmap(twin_screen_t *screen, twin_argb32_t *span,
dst.argb32 = span + (p_left - left);
src.p = twin_pixmap_pointer (p, p_left - p->x, y - p->y);
if (p->format == TWIN_RGB16)
- _twin_rgb16_source_argb32 (dst, src, p_right - p_left);
+ op16 (dst, src, p_right - p_left);
else
- _twin_argb32_over_argb32 (dst, src, p_right - p_left);
+ op32 (dst, src, p_right - p_left);
}
void
@@ -178,7 +179,19 @@ twin_screen_update (twin_screen_t *screen)
twin_coord_t top = screen->damage.top;
twin_coord_t right = screen->damage.right;
twin_coord_t bottom = screen->damage.bottom;
-
+ twin_src_op pop16, pop32, bop32;
+
+ pop16 = _twin_rgb16_source_argb32;
+ pop32 = _twin_argb32_over_argb32;
+ bop32 = _twin_argb32_source_argb32;
+
+#ifdef HAVE_ALTIVEC
+ if (twin_has_feature(TWIN_FEATURE_ALTIVEC)) {
+ pop32 = _twin_vec_argb32_over_argb32;
+ bop32 = _twin_vec_argb32_source_argb32;
+ }
+#endif
+
if (right > screen->width)
right = screen->width;
if (bottom > screen->height)
@@ -221,18 +234,19 @@ twin_screen_update (twin_screen_t *screen)
p_this = right - p_left;
src.p = twin_pixmap_pointer (screen->background,
m_left, p_y);
- _twin_argb32_source_argb32 (dst, src, p_this);
+ bop32 (dst, src, p_this);
}
}
else
memset (span, 0xff, width * sizeof (twin_argb32_t));
for (p = screen->bottom; p; p = p->up)
- twin_screen_span_pixmap(screen, span, p, y, left, right);
+ twin_screen_span_pixmap(screen, span, p, y, left, right,
+ pop16, pop32);
if (screen->cursor)
twin_screen_span_pixmap(screen, span, screen->cursor,
- y, left, right);
+ y, left, right, pop16, pop32);
(*screen->put_span) (left, y, right, span, screen->closure);
}
diff --git a/libtwin/twinint.h b/libtwin/twinint.h
index aa46686..a54d244 100644
--- a/libtwin/twinint.h
+++ b/libtwin/twinint.h
@@ -23,6 +23,7 @@
#define _TWININT_H_
#include "twin.h"
+#include "twin_def.h"
#include <string.h>
/*
@@ -275,6 +276,9 @@ twin_op_func _twin_rgb16_source_a8;
twin_op_func _twin_a8_source_a8;
twin_op_func _twin_c_source_a8;
+twin_op_func _twin_vec_argb32_over_argb32;
+twin_op_func _twin_vec_argb32_source_argb32;
+
twin_argb32_t *
_twin_fetch_rgb16 (twin_pixmap_t *pixmap, int x, int y, int w, twin_argb32_t *span);
@@ -362,6 +366,13 @@ void
_twin_path_sfinish (twin_path_t *path);
/*
+ * Draw stuff
+ */
+
+void
+_twin_draw_set_features(void);
+
+/*
* Glyph stuff. Coordinates are stored in 2.6 fixed point format
*/
diff --git a/twin_demos/ftwin.c b/twin_demos/ftwin.c
index 1d5c487..7a19dad 100644
--- a/twin_demos/ftwin.c
+++ b/twin_demos/ftwin.c
@@ -60,6 +60,8 @@ int main (int argc, char **argv)
int hx, hy;
twin_pixmap_t *cur;
+ twin_feature_init();
+
tf = twin_fbdev_create(-1, SIGUSR1);
if (tf == NULL)
return 1;
diff --git a/twin_demos/xtwin.c b/twin_demos/xtwin.c
index 82381db..898ddfd 100644
--- a/twin_demos/xtwin.c
+++ b/twin_demos/xtwin.c
@@ -41,7 +41,11 @@ int
main (int argc, char **argv)
{
Display *dpy = XOpenDisplay (0);
- twin_x11_t *x11 = twin_x11_create (dpy, WIDTH, HEIGHT);
+ twin_x11_t *x11;
+
+ twin_feature_init();
+
+ x11 = twin_x11_create (dpy, WIDTH, HEIGHT);
twin_screen_set_background (x11->screen, twin_make_pattern ());
#if 0