add binfolder path relative filters, fix gbrp color model, vwdw timebar tweaks, title...
[goodguy/history.git] / cinelerra-5.1 / cinelerra / affine.C
index f6bedb2e17b557631889ca79b20e49789d37c569..a59d6e3ee9997372f5b9c4eede18f44ac0d96a17 100644 (file)
@@ -25,6 +25,7 @@
 #endif
 
 #include "affine.h"
+#include "interp.h"
 #include "clip.h"
 #include "vframe.h"
 
@@ -73,22 +74,16 @@ void AffineMatrix::scale(double x, double y)
 
 void AffineMatrix::multiply(AffineMatrix *dst)
 {
-       int i, j;
        AffineMatrix tmp;
-       double t1, t2, t3;
-
-       for (i = 0; i < 3; i++)
-    {
-       t1 = values[i][0];
-       t2 = values[i][1];
-       t3 = values[i][2];
-       for (j = 0; j < 3; j++)
-               {
+
+       for( int i=0; i<3; ++i ) {
+               double t1 = values[i][0], t2 = values[i][1], t3 = values[i][2];
+               for( int j=0; j<3; ++j ) {
                        tmp.values[i][j]  = t1 * dst->values[0][j];
                        tmp.values[i][j] += t2 * dst->values[1][j];
                        tmp.values[i][j] += t3 * dst->values[2][j];
                }
-    }
+       }
        dst->copy_from(&tmp);
 }
 
@@ -97,11 +92,11 @@ double AffineMatrix::determinant()
        double determinant;
 
        determinant  =
-        values[0][0] * (values[1][1] * values[2][2] - values[1][2] * values[2][1]);
+               values[0][0] * (values[1][1] * values[2][2] - values[1][2] * values[2][1]);
        determinant -=
-        values[1][0] * (values[0][1] * values[2][2] - values[0][2] * values[2][1]);
+               values[1][0] * (values[0][1] * values[2][2] - values[0][2] * values[2][1]);
        determinant +=
-        values[2][0] * (values[0][1] * values[1][2] - values[0][2] * values[1][1]);
+               values[2][0] * (values[0][1] * values[1][2] - values[0][2] * values[1][1]);
 
        return determinant;
 }
@@ -113,36 +108,36 @@ void AffineMatrix::invert(AffineMatrix *dst)
        det_1 = determinant();
 
        if(det_1 == 0.0)
-       return;
+       return;
 
        det_1 = 1.0 / det_1;
 
        dst->values[0][0] =
-      (values[1][1] * values[2][2] - values[1][2] * values[2][1]) * det_1;
+               (values[1][1] * values[2][2] - values[1][2] * values[2][1]) * det_1;
 
        dst->values[1][0] =
-      - (values[1][0] * values[2][2] - values[1][2] * values[2][0]) * det_1;
+               - (values[1][0] * values[2][2] - values[1][2] * values[2][0]) * det_1;
 
        dst->values[2][0] =
-      (values[1][0] * values[2][1] - values[1][1] * values[2][0]) * det_1;
+               (values[1][0] * values[2][1] - values[1][1] * values[2][0]) * det_1;
 
        dst->values[0][1] =
-      - (values[0][1] * values[2][2] - values[0][2] * values[2][1] ) * det_1;
+               - (values[0][1] * values[2][2] - values[0][2] * values[2][1] ) * det_1;
 
        dst->values[1][1] =
-      (values[0][0] * values[2][2] - values[0][2] * values[2][0]) * det_1;
+               (values[0][0] * values[2][2] - values[0][2] * values[2][0]) * det_1;
 
        dst->values[2][1] =
-      - (values[0][0] * values[2][1] - values[0][1] * values[2][0]) * det_1;
+               - (values[0][0] * values[2][1] - values[0][1] * values[2][0]) * det_1;
 
        dst->values[0][2] =
-      (values[0][1] * values[1][2] - values[0][2] * values[1][1]) * det_1;
+               (values[0][1] * values[1][2] - values[0][2] * values[1][1]) * det_1;
 
        dst->values[1][2] =
-      - (values[0][0] * values[1][2] - values[0][2] * values[1][0]) * det_1;
+               - (values[0][0] * values[1][2] - values[0][2] * values[1][0]) * det_1;
 
        dst->values[2][2] =
-      (values[0][0] * values[1][1] - values[0][1] * values[1][0]) * det_1;
+               (values[0][0] * values[1][1] - values[0][1] * values[1][0]) * det_1;
 }
 
 void AffineMatrix::copy_from(AffineMatrix *src)
@@ -150,6 +145,38 @@ void AffineMatrix::copy_from(AffineMatrix *src)
        memcpy(&values[0][0], &src->values[0][0], sizeof(values));
 }
 
+void AffineMatrix::set_matrix(
+       double in_x1, double in_y1, double in_x2, double in_y2,
+       double out_x1, double out_y1, double out_x2, double out_y2,
+       double out_x3, double out_y3, double out_x4, double out_y4)
+{
+       double scalex = in_x2 > in_x1 ? 1./(in_x2 - in_x1) : 1.0;
+       double scaley = in_y2 > in_y1 ? 1./(in_y2 - in_y1) : 1.0;
+       double dx1 = out_x2 - out_x4, dx2 = out_x3 - out_x4;
+       double dx3 = out_x1 - out_x2 + out_x4 - out_x3;
+
+       double dy1 = out_y2 - out_y4, dy2 = out_y3 - out_y4;
+       double dy3 = out_y1 - out_y2 + out_y4 - out_y3;
+       double det = dx1 * dy2 - dy1 * dx2;
+       if( !det ) { identity();  return; }
+
+       AffineMatrix m;
+       m.values[2][0] = (dx3 * dy2 - dy3 * dx2) / det;
+       m.values[2][1] = (dx1 * dy3 - dy1 * dx3) / det;
+       m.values[0][0] = out_x2 - out_x1 + m.values[2][0] * out_x2;
+       m.values[0][1] = out_x3 - out_x1 + m.values[2][1] * out_x3;
+       m.values[0][2] = out_x1;
+       m.values[1][0] = out_y2 - out_y1 + m.values[2][0] * out_y2;
+       m.values[1][1] = out_y3 - out_y1 + m.values[2][1] * out_y3;
+       m.values[1][2] = out_y1;
+       m.values[2][2] = 1.0;
+
+       identity();
+       translate(-in_x1, -in_y1);
+       scale(scalex, scaley);
+       m.multiply(this);
+}
+
 void AffineMatrix::transform_point(float x,
        float y,
        float *newx,
@@ -158,11 +185,7 @@ void AffineMatrix::transform_point(float x,
        double w;
 
        w = values[2][0] * x + values[2][1] * y + values[2][2];
-
-       if (w == 0.0)
-       w = 1.0;
-       else
-       w = 1.0 / w;
+       w = !w ? 1 : 1/w;
 
        *newx = (values[0][0] * x + values[0][1] * y + values[0][2]) * w;
        *newy = (values[1][0] * x + values[1][1] * y + values[1][2]) * w;
@@ -195,120 +218,22 @@ AffineUnit::AffineUnit(AffineEngine *server)
 }
 
 
-
-
-
-
-
-
-
-void AffineUnit::calculate_matrix(
-       double in_x1,
-       double in_y1,
-       double in_x2,
-       double in_y2,
-       double out_x1,
-       double out_y1,
-       double out_x2,
-       double out_y2,
-       double out_x3,
-       double out_y3,
-       double out_x4,
-       double out_y4,
-       AffineMatrix *result)
+static inline float transform_cubic(float dx,
+               float p0, float p1, float p2, float p3)
 {
-       AffineMatrix matrix;
-       double scalex;
-       double scaley;
-
-       scalex = scaley = 1.0;
-
-       if((in_x2 - in_x1) > 0)
-       scalex = 1.0 / (double)(in_x2 - in_x1);
-
-       if((in_y2 - in_y1) > 0)
-       scaley = 1.0 / (double)(in_y2 - in_y1);
-
-/* Determine the perspective transform that maps from
- * the unit cube to the transformed coordinates
- */
-    double dx1, dx2, dx3, dy1, dy2, dy3;
-    double det1, det2;
-
-    dx1 = out_x2 - out_x4;
-    dx2 = out_x3 - out_x4;
-    dx3 = out_x1 - out_x2 + out_x4 - out_x3;
-
-    dy1 = out_y2 - out_y4;
-    dy2 = out_y3 - out_y4;
-    dy3 = out_y1 - out_y2 + out_y4 - out_y3;
-// printf("AffineUnit::calculate_matrix %f %f %f %f %f %f\n",
-// dx1,
-// dx2,
-// dx3,
-// dy1,
-// dy2,
-// dy3
-// );
-
-/*  Is the mapping affine?  */
-    if((dx3 == 0.0) && (dy3 == 0.0))
-    {
-        matrix.values[0][0] = out_x2 - out_x1;
-        matrix.values[0][1] = out_x4 - out_x2;
-        matrix.values[0][2] = out_x1;
-        matrix.values[1][0] = out_y2 - out_y1;
-        matrix.values[1][1] = out_y4 - out_y2;
-        matrix.values[1][2] = out_y1;
-        matrix.values[2][0] = 0.0;
-        matrix.values[2][1] = 0.0;
-    }
-    else
-    {
-        det1 = dx3 * dy2 - dy3 * dx2;
-        det2 = dx1 * dy2 - dy1 * dx2;
-        matrix.values[2][0] = det1 / det2;
-        det1 = dx1 * dy3 - dy1 * dx3;
-        det2 = dx1 * dy2 - dy1 * dx2;
-        matrix.values[2][1] = det1 / det2;
-
-        matrix.values[0][0] = out_x2 - out_x1 + matrix.values[2][0] * out_x2;
-        matrix.values[0][1] = out_x3 - out_x1 + matrix.values[2][1] * out_x3;
-        matrix.values[0][2] = out_x1;
-
-        matrix.values[1][0] = out_y2 - out_y1 + matrix.values[2][0] * out_y2;
-        matrix.values[1][1] = out_y3 - out_y1 + matrix.values[2][1] * out_y3;
-        matrix.values[1][2] = out_y1;
-    }
-
-    matrix.values[2][2] = 1.0;
-
-// printf("AffineUnit::calculate_matrix 1 %f %f\n", dx3, dy3);
-// matrix.dump();
-
-       result->identity();
-       result->translate(-in_x1, -in_y1);
-       result->scale(scalex, scaley);
-       matrix.multiply(result);
-// double test[3][3] = { { 0.0896, 0.0, 0.0 },
-//                               { 0.0, 0.0896, 0.0 },
-//                               { -0.00126, 0.0, 1.0 } };
-// memcpy(&result->values[0][0], test, sizeof(test));
-// printf("AffineUnit::calculate_matrix 4 %p\n", result);
-// result->dump();
-
-
+/* Catmull-Rom - not bad */
+       float result = ((( (- p0 + 3*p1 - 3*p2 + p3) * dx +
+                        ( 2*p0 - 5*p1 + 4*p2 - p3 ) ) * dx +
+                        ( - p0 + p2 ) ) * dx + (p1 + p1) ) / 2;
+// printf("%f %f %f %f %f\n", result, p0, p1, p2, p3);
+       return result;
 }
 
-static inline float transform_cubic(float dx,
-               float jm1, float j, float jp1, float jp2)
+static inline float transform_linear(float dx,
+               float p1, float p2)
 {
-/* Catmull-Rom - not bad */
-       float result = ((( ( - jm1 + 3.0 * j - 3.0 * jp1 + jp2 ) * dx +
-                      ( 2.0 * jm1 - 5.0 * j + 4.0 * jp1 - jp2 ) ) * dx +
-                      ( - jm1 + jp1 ) ) * dx + (j + j) ) / 2.0;
-// printf("%f %f %f %f %f\n", result, jm1, j, jp1, jp2);
-       return result;
+       float result = p1 * (1-dx) + p2 * dx;
+       return result;
 }
 
 
@@ -339,11 +264,10 @@ void AffineUnit::process_package(LoadPackage *package)
 
 // Calculate real coords
        float out_x1, out_y1, out_x2, out_y2, out_x3, out_y3, out_x4, out_y4;
-       if(server->mode == AffineEngine::STRETCH ||
-               server->mode == AffineEngine::PERSPECTIVE ||
-               server->mode == AffineEngine::ROTATE ||
-               server->mode == AffineEngine::TRANSFORM)
-       {
+       if( server->mode == AffineEngine::STRETCH ||
+           server->mode == AffineEngine::PERSPECTIVE ||
+           server->mode == AffineEngine::ROTATE ||
+           server->mode == AffineEngine::TRANSFORM ) {
                out_x1 = (float)server->in_x + (float)server->x1 * server->in_w / 100;
                out_y1 = (float)server->in_y + (float)server->y1 * server->in_h / 100;
                out_x2 = (float)server->in_x + (float)server->x2 * server->in_w / 100;
@@ -353,8 +277,7 @@ void AffineUnit::process_package(LoadPackage *package)
                out_x4 = (float)server->in_x + (float)server->x4 * server->in_w / 100;
                out_y4 = (float)server->in_y + (float)server->y4 * server->in_h / 100;
        }
-       else
-       {
+       else {
                out_x1 = (float)server->in_x + (float)server->x1 * server->in_w / 100;
                out_y1 = server->in_y;
                out_x2 = out_x1 + server->in_w;
@@ -368,10 +291,14 @@ void AffineUnit::process_package(LoadPackage *package)
 
 
 // Rotation with OpenGL uses a simple quad.
-       if(server->mode == AffineEngine::ROTATE &&
-               server->use_opengl)
-       {
+       if( server->mode == AffineEngine::ROTATE &&
+           server->use_opengl ) {
 #ifdef HAVE_GL
+               out_x1 -= pivot_offset_x;  out_y1 -= pivot_offset_y;
+               out_x2 -= pivot_offset_x;  out_y2 -= pivot_offset_y;
+               out_x3 -= pivot_offset_x;  out_y3 -= pivot_offset_y;
+               out_x4 -= pivot_offset_x;  out_y4 -= pivot_offset_y;
+
                server->output->to_texture();
                server->output->enable_opengl();
                server->output->init_screen();
@@ -411,11 +338,10 @@ void AffineUnit::process_package(LoadPackage *package)
 #endif
        }
        else
-       if(server->mode == AffineEngine::PERSPECTIVE ||
-               server->mode == AffineEngine::SHEER ||
-               server->mode == AffineEngine::ROTATE ||
-               server->mode == AffineEngine::TRANSFORM)
-       {
+       if( server->mode == AffineEngine::PERSPECTIVE ||
+           server->mode == AffineEngine::SHEER ||
+           server->mode == AffineEngine::ROTATE ||
+           server->mode == AffineEngine::TRANSFORM ) {
                AffineMatrix matrix;
                float temp;
 // swap points 3 & 4
@@ -430,64 +356,37 @@ void AffineUnit::process_package(LoadPackage *package)
 
 
 
-               if(server->mode != AffineEngine::TRANSFORM)
-               {
-                       calculate_matrix(
-                               server->in_x,
-                               server->in_y,
+               if( server->mode != AffineEngine::TRANSFORM ) {
+                       matrix.set_matrix(server->in_x, server->in_y,
                                server->in_x + server->in_w,
                                server->in_y + server->in_h,
-                               out_x1,
-                               out_y1,
-                               out_x2,
-                               out_y2,
-                               out_x3,
-                               out_y3,
-                               out_x4,
-                               out_y4,
-                               &matrix);
+                               out_x1, out_y1, out_x2, out_y2,
+                               out_x3, out_y3, out_x4, out_y4);
                }
-               else
-               {
+               else {
                        matrix.copy_from(&server->matrix);
                }
 
-// printf("AffineUnit::process_package %d\n%f %f %f\n%f %f %f\n%f %f %f\n",
-// __LINE__,
-// matrix.values[0][0],
-// matrix.values[0][1],
-// matrix.values[0][2],
-// matrix.values[1][0],
-// matrix.values[1][1],
-// matrix.values[1][2],
-// matrix.values[2][0],
-// matrix.values[2][1],
-// matrix.values[2][2]);
-               int interpolate = 1;
+//printf("AffineUnit::process_package %d\n%f %f %f\n%f %f %f\n%f %f %f\n", __LINE__,
+// matrix.values[0][0], matrix.values[0][1], matrix.values[0][2],
+// matrix.values[1][0], matrix.values[1][1], matrix.values[1][2],
+// matrix.values[2][0], matrix.values[2][1], matrix.values[2][2]);
                int reverse = !server->forward;
                float tx, ty, tw;
                float xinc, yinc, winc;
                AffineMatrix m, im;
                float ttx = 0, tty = 0;
-               int itx = 0, ity = 0;
                int tx1 = 0, ty1 = 0, tx2 = 0, ty2 = 0;
 
-               if(reverse)
-               {
+               if(reverse) {
                        m.copy_from(&matrix);
                        m.invert(&im);
                        matrix.copy_from(&im);
                }
-               else
-               {
+               else {
                        matrix.invert(&m);
                }
 
-
-
-
-
-
                float dx1 = 0, dy1 = 0;
                float dx2 = 0, dy2 = 0;
                float dx3 = 0, dy3 = 0;
@@ -502,12 +401,8 @@ void AffineUnit::process_package(LoadPackage *package)
 // printf("AffineUnit::process_package %d use_opengl=%d\n",
 // __LINE__, server->use_opengl);
 
-
-
-
-
-               if(server->use_opengl)
-               {
+               if( server->use_opengl &&
+                   server->interpolation == AffineEngine::AF_DEFAULT ) {
 #ifdef HAVE_GL
                        static const char *affine_frag =
                                "uniform sampler2D tex;\n"
@@ -542,17 +437,12 @@ void AffineUnit::process_package(LoadPackage *package)
 
                        server->output->to_texture();
                        server->output->enable_opengl();
-                       unsigned int frag_shader = VFrame::make_shader(0,
-                                       affine_frag,
-                                       0);
-                       if(frag_shader > 0)
-                       {
+                       unsigned int frag_shader = VFrame::make_shader(0, affine_frag, 0);
+                       if( frag_shader > 0 ) {
                                glUseProgram(frag_shader);
                                glUniform1i(glGetUniformLocation(frag_shader, "tex"), 0);
                                glUniformMatrix3fv(glGetUniformLocation(frag_shader, "affine_matrix"),
-                                       1,
-                                       0,
-                                       affine_matrix);
+                                       1, 0, affine_matrix);
                                glUniform2f(glGetUniformLocation(frag_shader, "texture_extents"),
                                        (GLfloat)server->output->get_texture_w(),
                                        (GLfloat)server->output->get_texture_h());
@@ -560,21 +450,19 @@ void AffineUnit::process_package(LoadPackage *package)
                                        (GLfloat)server->output->get_w() / server->output->get_texture_w(),
                                        (GLfloat)server->output->get_h() / server->output->get_texture_h());
                                float border_color[] = { 0, 0, 0, 0 };
-                               if(BC_CModels::is_yuv(server->output->get_color_model()))
-                               {
+                               if(BC_CModels::is_yuv(server->output->get_color_model())) {
                                        border_color[1] = 0.5;
                                        border_color[2] = 0.5;
                                }
-                               if(!BC_CModels::has_alpha(server->output->get_color_model()))
-                               {
+                               if(!BC_CModels::has_alpha(server->output->get_color_model())) {
                                        border_color[3] = 1.0;
                                }
 
                                glUniform4fv(glGetUniformLocation(frag_shader, "border_color"),
-                                       1,
-                                       (GLfloat*)border_color);
+                                       1, (GLfloat*)border_color);
                                server->output->init_screen();
                                server->output->bind_texture(0);
+                               glHint(GL_POINT_SMOOTH_HINT, GL_NICEST);
                                glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, border_color);
                                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
                                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
@@ -586,527 +474,121 @@ void AffineUnit::process_package(LoadPackage *package)
 #endif // HAVE_GL
                }
 
-
-
-
-
-
 #define ROUND(x) ((int)((x > 0) ? (x) + 0.5 : (x) - 0.5))
 #define MIN4(a,b,c,d) MIN(MIN(MIN(a,b),c),d)
 #define MAX4(a,b,c,d) MAX(MAX(MAX(a,b),c),d)
 
-       tx1 = ROUND(MIN4(dx1 - pivot_offset_x, dx2 - pivot_offset_x, dx3 - pivot_offset_x, dx4 - pivot_offset_x));
-       ty1 = ROUND(MIN4(dy1 - pivot_offset_y, dy2 - pivot_offset_y, dy3 - pivot_offset_y, dy4 - pivot_offset_y));
+       tx1 = ROUND(MIN4(dx1 - pivot_offset_x, dx2 - pivot_offset_x, dx3 - pivot_offset_x, dx4 - pivot_offset_x));
+       ty1 = ROUND(MIN4(dy1 - pivot_offset_y, dy2 - pivot_offset_y, dy3 - pivot_offset_y, dy4 - pivot_offset_y));
 
-       tx2 = ROUND(MAX4(dx1 - pivot_offset_x, dx2 - pivot_offset_x, dx3 - pivot_offset_x, dx4 - pivot_offset_x));
-       ty2 = ROUND(MAX4(dy1 - pivot_offset_y, dy2 - pivot_offset_y, dy3 - pivot_offset_y, dy4 - pivot_offset_y));
+       tx2 = ROUND(MAX4(dx1 - pivot_offset_x, dx2 - pivot_offset_x, dx3 - pivot_offset_x, dx4 - pivot_offset_x));
+       ty2 = ROUND(MAX4(dy1 - pivot_offset_y, dy2 - pivot_offset_y, dy3 - pivot_offset_y, dy4 - pivot_offset_y));
 
-               CLAMP(ty1, pkg->y1, pkg->y2);
-               CLAMP(ty2, pkg->y1, pkg->y2);
-               CLAMP(tx1, server->out_x, server->out_x + server->out_w);
-               CLAMP(tx2, server->out_x, server->out_x + server->out_w);
+       CLAMP(ty1, pkg->y1, pkg->y2);
+       CLAMP(ty2, pkg->y1, pkg->y2);
+       CLAMP(tx1, server->out_x, server->out_x + server->out_w);
+       CLAMP(tx2, server->out_x, server->out_x + server->out_w);
 
 
-               xinc = m.values[0][0];
-               yinc = m.values[1][0];
-               winc = m.values[2][0];
+       xinc = m.values[0][0];
+       yinc = m.values[1][0];
+       winc = m.values[2][0];
 
-//printf("AffineUnit::process_package 2 tx1=%d ty1=%d tx2=%d ty2=%d %f %f\n", tx1, ty1, tx2, ty2, out_x4, out_y4);
+//printf("AffineUnit::process_package 2 tx1=%d ty1=%d tx2=%d ty2=%d %f %f\n",
+// tx1, ty1, tx2, ty2, out_x4, out_y4);
 //printf("AffineUnit::process_package %d %d %d %d %d\n",
-//__LINE__,
-//min_in_x,
-//max_in_x,
-//min_in_y,
-//max_in_y);
-
-#define CUBIC_ROW(in_row, chroma_offset) \
-       transform_cubic(dx, \
-               in_row[col1_offset] - chroma_offset, \
-               in_row[col2_offset] - chroma_offset, \
-               in_row[col3_offset] - chroma_offset, \
-               in_row[col4_offset] - chroma_offset)
-
-
-#define TRANSFORM(components, type, temp_type, chroma_offset, max) \
-{ \
-       type **in_rows = (type**)server->input->get_rows(); \
-       float round_factor = 0.0; \
-       if(sizeof(type) < 4) round_factor = 0.5; \
-       for(int y = ty1; y < ty2; y++) \
-       { \
-               type *out_row = (type*)server->output->get_rows()[y]; \
+// __LINE__, min_in_x, max_in_x, min_in_y, max_in_y);
+
+#define DO_INTERP(tag, interp, components, type, temp_type, chroma, max) \
+case tag: { \
+    type **inp_rows = (type**)server->input->get_rows(); \
+    type **out_rows = (type**)server->output->get_rows(); \
+    float round_factor = sizeof(type) < 4 ? 0.5 : 0; \
+    INTERP_SETUP(inp_rows, max, min_in_x,min_in_y, max_in_x,max_in_y); \
  \
-               if(!interpolate) \
-               { \
-               tx = xinc * (tx1 + 0.5) + \
-                               m.values[0][1] * (y + pivot_offset_y + 0.5) + \
-                               m.values[0][2] + \
-                               pivot_offset_x * xinc; \
-               ty = yinc * (tx1 + 0.5) + \
-                               m.values[1][1] * (y + pivot_offset_y + 0.5) + \
-                               m.values[1][2] + \
-                               pivot_offset_x * yinc; \
-               tw = winc * (tx1 + 0.5) + \
-                               m.values[2][1] * (y + pivot_offset_y + 0.5) + \
-                               m.values[2][2] + \
-                               pivot_offset_x * winc; \
-               } \
-       else \
-        { \
-               tx = xinc * tx1 + \
-                               m.values[0][1] * (y + pivot_offset_y) + \
-                               m.values[0][2] + \
-                               pivot_offset_x * xinc; \
-               ty = yinc * tx1 + \
-                               m.values[1][1] * (y + pivot_offset_y) + \
-                               m.values[1][2] + \
-                               pivot_offset_x * yinc; \
-               tw = winc * tx1 + \
-                               m.values[2][1] * (y + pivot_offset_y) + \
-                               m.values[2][2] + \
-                               pivot_offset_x * winc; \
-        } \
+    for( int y=ty1; y<ty2; ++y ) { \
+        type *out_row = (type*)out_rows[y]; \
+        int x1 = tx1, x2 = tx2; \
+        if( x1 < min_out_x ) x1 = min_out_x; \
+        if( x2 > max_out_x ) x2 = max_out_x; \
+        tx = xinc * x1 + m.values[0][1] * (y + pivot_offset_y) + m.values[0][2] \
+            + pivot_offset_x * xinc; \
+        ty = yinc * x1 + m.values[1][1] * (y + pivot_offset_y) + m.values[1][2] \
+            + pivot_offset_x * yinc; \
+        tw = winc * x1 + m.values[2][1] * (y + pivot_offset_y) + m.values[2][2] \
+            + pivot_offset_x * winc; \
+        type *out = out_row + x1 * components; \
  \
- \
-               out_row += tx1 * components; \
-               for(int x = tx1; x < tx2; x++) \
-               { \
+        for( int x=x1; x<x2; ++x ) { \
 /* Normalize homogeneous coords */ \
-                       if(tw == 0.0) \
-                       { \
-                               ttx = 0.0; \
-                               tty = 0.0; \
-                       } \
-                       else \
-                       if(tw != 1.0) \
-                       { \
-                               ttx = tx / tw; \
-                               tty = ty / tw; \
-                       } \
-                       else \
-                       { \
-                               ttx = tx; \
-                               tty = ty; \
-                       } \
-                       itx = (int)ttx; \
-                       ity = (int)tty; \
- \
-                       int row1 = ity - 1; \
-                       int row2 = ity; \
-                       int row3 = ity + 1; \
-                       int row4 = ity + 2; \
-                       CLAMP(row1, min_in_y, max_in_y); \
-                       CLAMP(row2, min_in_y, max_in_y); \
-                       CLAMP(row3, min_in_y, max_in_y); \
-                       CLAMP(row4, min_in_y, max_in_y); \
- \
-/* Set destination pixels if in clipping region */ \
-                       if(!interpolate && \
-                               x >= min_out_x && \
-                               x < max_out_x) \
-                       { \
-                               if(itx >= min_in_x && \
-                                       itx <= max_in_x && \
-                                       ity >= min_in_y && \
-                                       ity <= max_in_y) \
-                               { \
-                                       type *src = in_rows[ity] + itx * components; \
-                                       *out_row++ = *src++; \
-                                       *out_row++ = *src++; \
-                                       *out_row++ = *src++; \
-                                       if(components == 4) *out_row++ = *src; \
-                               } \
-                               else \
-/* Fill with chroma */ \
-                               { \
-                                       *out_row++ = 0; \
-                                       *out_row++ = chroma_offset; \
-                                       *out_row++ = chroma_offset; \
-                                       if(components == 4) *out_row++ = 0; \
-                               } \
-                       } \
-                       else \
-/* Bicubic algorithm */ \
-                       if(interpolate &&  \
-                               x >= min_out_x &&  \
-                               x < max_out_x) \
-                       { \
-/* clipping region */ \
-                               if ((itx + 2) >= min_in_x && \
-                                       (itx - 1) <= max_in_x && \
-                       (ity + 2) >= min_in_y && \
-                                       (ity - 1) <= max_in_y) \
-                { \
-                       float dx, dy; \
- \
-/* the fractional error */ \
-                       dx = ttx - itx; \
-                       dy = tty - ity; \
- \
-/* Row and column offsets in cubic block */ \
-                                       int col1 = itx - 1; \
-                                       int col2 = itx; \
-                                       int col3 = itx + 1; \
-                                       int col4 = itx + 2; \
-                                       CLAMP(col1, min_in_x, max_in_x); \
-                                       CLAMP(col2, min_in_x, max_in_x); \
-                                       CLAMP(col3, min_in_x, max_in_x); \
-                                       CLAMP(col4, min_in_x, max_in_x); \
-                                       int col1_offset = col1 * components; \
-                                       int col2_offset = col2 * components; \
-                                       int col3_offset = col3 * components; \
-                                       int col4_offset = col4 * components; \
- \
-                                       type *row1_ptr = in_rows[row1]; \
-                                       type *row2_ptr = in_rows[row2]; \
-                                       type *row3_ptr = in_rows[row3]; \
-                                       type *row4_ptr = in_rows[row4]; \
-                                       temp_type r, g, b, a; \
- \
-                                       r = (temp_type)(transform_cubic(dy, \
-                       CUBIC_ROW(row1_ptr, 0x0), \
-                       CUBIC_ROW(row2_ptr, 0x0), \
-                       CUBIC_ROW(row3_ptr, 0x0), \
-                       CUBIC_ROW(row4_ptr, 0x0)) + \
-                                               round_factor); \
- \
-                                       row1_ptr++; \
-                                       row2_ptr++; \
-                                       row3_ptr++; \
-                                       row4_ptr++; \
-                                       g = (temp_type)(transform_cubic(dy, \
-                       CUBIC_ROW(row1_ptr, chroma_offset), \
-                       CUBIC_ROW(row2_ptr, chroma_offset), \
-                       CUBIC_ROW(row3_ptr, chroma_offset), \
-                       CUBIC_ROW(row4_ptr, chroma_offset)) + \
-                                               round_factor); \
-                                       g += chroma_offset; \
- \
-                                       row1_ptr++; \
-                                       row2_ptr++; \
-                                       row3_ptr++; \
-                                       row4_ptr++; \
-                                       b = (temp_type)(transform_cubic(dy, \
-                       CUBIC_ROW(row1_ptr, chroma_offset), \
-                       CUBIC_ROW(row2_ptr, chroma_offset), \
-                       CUBIC_ROW(row3_ptr, chroma_offset), \
-                       CUBIC_ROW(row4_ptr, chroma_offset)) + \
-                                               round_factor); \
-                                       b += chroma_offset; \
- \
-                                       if(components == 4) \
-                                       { \
-                                               row1_ptr++; \
-                                               row2_ptr++; \
-                                               row3_ptr++; \
-                                               row4_ptr++; \
-                                               a = (temp_type)(transform_cubic(dy, \
-                               CUBIC_ROW(row1_ptr, 0x0), \
-                               CUBIC_ROW(row2_ptr, 0x0), \
-                               CUBIC_ROW(row3_ptr, 0x0), \
-                               CUBIC_ROW(row4_ptr, 0x0)) +  \
-                                                       round_factor); \
-                                       } \
- \
-                                       if(sizeof(type) < 4) \
-                                       { \
-                                               *out_row++ = CLIP(r, 0, max); \
-                                               *out_row++ = CLIP(g, 0, max); \
-                                               *out_row++ = CLIP(b, 0, max); \
-                                               if(components == 4) *out_row++ = CLIP(a, 0, max); \
-                                       } \
-                                       else \
-                                       { \
-                                               *out_row++ = r; \
-                                               *out_row++ = g; \
-                                               *out_row++ = b; \
-                                               if(components == 4) *out_row++ = a; \
-                                       } \
-                } \
-                               else \
-/* Fill with chroma */ \
-                               { \
-                                       *out_row++ = 0; \
-                                       *out_row++ = chroma_offset; \
-                                       *out_row++ = chroma_offset; \
-                                       if(components == 4) *out_row++ = 0; \
-                               } \
-                       } \
-                       else \
-                       { \
-                               out_row += components; \
-                       } \
+            if( tw == 0.0 ) { ttx = 0.0; tty = 0.0; } \
+            else { ttx = tx / tw; tty = ty / tw; } \
+            interp##_SETUP(type, components, ttx, tty); \
+            *out++ = ((temp_type)interp##_interp(0, 0) + round_factor); \
+            interp##_next(); \
+            *out++ = ((temp_type)interp##_interp(chroma, chroma) + round_factor); \
+            interp##_next(); \
+            *out++ = ((temp_type)interp##_interp(chroma, chroma) + round_factor); \
+            if( components == 4 ) { \
+                interp##_next(); \
+                *out++ = ((temp_type)interp##_interp(0, 0) + round_factor); \
+            } \
  \
 /*  increment the transformed coordinates  */ \
-                       tx += xinc; \
-                       ty += yinc; \
-                       tw += winc; \
-               } \
-       } \
-}
-
-
-
+            tx += xinc;  ty += yinc;  tw += winc; \
+        } \
+    } \
+} break
 
 // printf("AffineUnit::process_package %d tx1=%d ty1=%d tx2=%d ty2=%d\n",
 // __LINE__, tx1, ty1, tx2, ty2);
-               switch(server->input->get_color_model())
-               {
-                       case BC_RGB_FLOAT:
-                               TRANSFORM(3, float, float, 0x0, 1.0)
-                               break;
-                       case BC_RGB888:
-                               TRANSFORM(3, unsigned char, int, 0x0, 0xff)
-                               break;
-                       case BC_RGBA_FLOAT:
-                               TRANSFORM(4, float, float, 0x0, 1.0)
-                               break;
-                       case BC_RGBA8888:
-                               TRANSFORM(4, unsigned char, int, 0x0, 0xff)
-                               break;
-                       case BC_YUV888:
-// DEBUG
-//                             TRANSFORM(3, unsigned char, int, 0x80, 0xff)
-{
 
-       unsigned char **in_rows = (unsigned char**)server->input->get_rows();
-       float round_factor = 0.0;
-       if(sizeof(unsigned char) < 4) round_factor = 0.5;
-
-       for(int y = ty1; y < ty2; y++)
-       {
-//printf("AffineUnit::process_package %d y=%d tx1=%d tx2=%d ty1=%d ty2=%d\n", 
-//__LINE__, y, tx1, tx2, ty1, ty2);
-               unsigned char *out_row = (unsigned char*)server->output->get_rows()[y];
-
-               if(!interpolate)
-               {
-               tx = xinc * (tx1 + 0.5) +
-                               m.values[0][1] * (y + pivot_offset_y + 0.5) +
-                               m.values[0][2] +
-                               pivot_offset_x * xinc;
-               ty = yinc * (tx1 + 0.5) +
-                               m.values[1][1] * (y + pivot_offset_y + 0.5) +
-                               m.values[1][2] +
-                               pivot_offset_x * yinc;
-               tw = winc * (tx1 + 0.5) +
-                               m.values[2][1] * (y + pivot_offset_y + 0.5) +
-                               m.values[2][2] +
-                               pivot_offset_x * winc;
-               }
-       else
-        {
-               tx = xinc * tx1 +
-                               m.values[0][1] * (y + pivot_offset_y) +
-                               m.values[0][2] +
-                               pivot_offset_x * xinc;
-               ty = yinc * tx1 +
-                               m.values[1][1] * (y + pivot_offset_y) +
-                               m.values[1][2] +
-                               pivot_offset_x * yinc;
-               tw = winc * tx1 +
-                               m.values[2][1] * (y + pivot_offset_y) +
-                               m.values[2][2] +
-                               pivot_offset_x * winc;
-        }
-
-
-               out_row += tx1 * 3;
-               for(int x = tx1; x < tx2; x++)
-               {
-/* Normalize homogeneous coords */
-                       if(tw == 0.0)
-                       {
-                               ttx = 0.0;
-                               tty = 0.0;
-                       }
-                       else
-                       if(tw != 1.0)
-                       {
-                               ttx = tx / tw;
-                               tty = ty / tw;
-                       }
-                       else
-                       {
-                               ttx = tx;
-                               tty = ty;
-                       }
-                       itx = (int)ttx;
-                       ity = (int)tty;
-
-                       int row1 = ity - 1;
-                       int row2 = ity;
-                       int row3 = ity + 1;
-                       int row4 = ity + 2;
-                       CLAMP(row1, min_in_y, max_in_y);
-                       CLAMP(row2, min_in_y, max_in_y);
-                       CLAMP(row3, min_in_y, max_in_y);
-                       CLAMP(row4, min_in_y, max_in_y);
-
-/* Set destination pixels if in clipping region */
-                       if(!interpolate &&
-                               x >= min_out_x &&
-                               x < max_out_x)
-                       {
-                               if(itx >= min_in_x &&
-                                       itx <= max_in_x &&
-                                       ity >= min_in_y &&
-                                       ity <= max_in_y)
-                               {
-                                       unsigned char *src = in_rows[ity] + itx * 3;
-                                       *out_row++ = *src++;
-                                       *out_row++ = *src++;
-                                       *out_row++ = *src++;
-                                       if(3 == 4) *out_row++ = *src;
-                               }
-                               else
-/* Fill with chroma */
-                               {
-                                       *out_row++ = 0;
-                                       *out_row++ = 0x80;
-                                       *out_row++ = 0x80;
-                                       if(3 == 4) *out_row++ = 0;
-                               }
+               switch( server->interpolation ) {
+               case AffineEngine::AF_NEAREST:
+                       switch( server->input->get_color_model() ) {
+                       DO_INTERP( BC_RGB_FLOAT, nearest, 3, float, float, 0x0, 1.0);
+                       DO_INTERP( BC_RGB888, nearest, 3, unsigned char, int, 0x0, 0xff);
+                       DO_INTERP( BC_RGBA_FLOAT, nearest, 4, float, float, 0x0, 1.0);
+                       DO_INTERP( BC_RGBA8888, nearest, 4, unsigned char, int, 0x0, 0xff);
+                       DO_INTERP( BC_YUV888, nearest, 3, unsigned char, int, 0x80, 0xff);
+                       DO_INTERP( BC_YUVA8888, nearest, 4, unsigned char, int, 0x80, 0xff);
+                       DO_INTERP( BC_RGB161616, nearest, 3, uint16_t, int, 0x0, 0xffff);
+                       DO_INTERP( BC_RGBA16161616, nearest, 4, uint16_t, int, 0x0, 0xffff);
+                       DO_INTERP( BC_YUV161616, nearest, 3, uint16_t, int, 0x8000, 0xffff);
+                       DO_INTERP( BC_YUVA16161616, nearest, 4, uint16_t, int, 0x8000, 0xffff);
                        }
-                       else
-/* Bicubic algorithm */
-                       if(interpolate &&
-                               x >= min_out_x &&
-                               x < max_out_x)
-                       {
-/* clipping region */
-                               if ((itx + 2) >= min_in_x &&
-                                       (itx - 1) <= max_in_x &&
-                       (ity + 2) >= min_in_y &&
-                                       (ity - 1) <= max_in_y)
-                {
-                       float dx, dy;
-
-/* the fractional error */
-                       dx = ttx - itx;
-                       dy = tty - ity;
-
-/* Row and column offsets in cubic block */
-                                       int col1 = itx - 1;
-                                       int col2 = itx;
-                                       int col3 = itx + 1;
-                                       int col4 = itx + 2;
-                                       CLAMP(col1, min_in_x, max_in_x);
-                                       CLAMP(col2, min_in_x, max_in_x);
-                                       CLAMP(col3, min_in_x, max_in_x);
-                                       CLAMP(col4, min_in_x, max_in_x);
-                                       int col1_offset = col1 * 3;
-                                       int col2_offset = col2 * 3;
-                                       int col3_offset = col3 * 3;
-                                       int col4_offset = col4 * 3;
-
-                                       unsigned char *row1_ptr = in_rows[row1];
-                                       unsigned char *row2_ptr = in_rows[row2];
-                                       unsigned char *row3_ptr = in_rows[row3];
-                                       unsigned char *row4_ptr = in_rows[row4];
-                                       int r, g, b, a;
-
-                                       r = (int)(transform_cubic(dy,
-                       CUBIC_ROW(row1_ptr, 0x0),
-                       CUBIC_ROW(row2_ptr, 0x0),
-                       CUBIC_ROW(row3_ptr, 0x0),
-                       CUBIC_ROW(row4_ptr, 0x0)) +
-                                               round_factor);
-
-                                       row1_ptr++;
-                                       row2_ptr++;
-                                       row3_ptr++;
-                                       row4_ptr++;
-                                       g = (int)(transform_cubic(dy,
-                       CUBIC_ROW(row1_ptr, 0x80),
-                       CUBIC_ROW(row2_ptr, 0x80),
-                       CUBIC_ROW(row3_ptr, 0x80),
-                       CUBIC_ROW(row4_ptr, 0x80)) +
-                                               round_factor);
-                                       g += 0x80;
-
-                                       row1_ptr++;
-                                       row2_ptr++;
-                                       row3_ptr++;
-                                       row4_ptr++;
-                                       b = (int)(transform_cubic(dy,
-                       CUBIC_ROW(row1_ptr, 0x80),
-                       CUBIC_ROW(row2_ptr, 0x80),
-                       CUBIC_ROW(row3_ptr, 0x80),
-                       CUBIC_ROW(row4_ptr, 0x80)) +
-                                               round_factor);
-                                       b += 0x80;
-
-                                       if(3 == 4)
-                                       {
-                                               row1_ptr++;
-                                               row2_ptr++;
-                                               row3_ptr++;
-                                               row4_ptr++;
-                                               a = (int)(transform_cubic(dy,
-                               CUBIC_ROW(row1_ptr, 0x0),
-                               CUBIC_ROW(row2_ptr, 0x0),
-                               CUBIC_ROW(row3_ptr, 0x0),
-                               CUBIC_ROW(row4_ptr, 0x0)) +
-                                                       round_factor);
-                                       }
-
-                                       if(sizeof(unsigned char) < 4)
-                                       {
-                                               *out_row++ = CLIP(r, 0, 0xff);
-                                               *out_row++ = CLIP(g, 0, 0xff);
-                                               *out_row++ = CLIP(b, 0, 0xff);
-                                               if(3 == 4) *out_row++ = CLIP(a, 0, 0xff);
-                                       }
-                                       else
-                                       {
-                                               *out_row++ = r;
-                                               *out_row++ = g;
-                                               *out_row++ = b;
-                                               if(3 == 4) *out_row++ = a;
-                                       }
-                }
-                               else
-/* Fill with chroma */
-                               {
-                                       *out_row++ = 0;
-                                       *out_row++ = 0x80;
-                                       *out_row++ = 0x80;
-                                       if(3 == 4) *out_row++ = 0;
-                               }
+                       break;
+               case AffineEngine::AF_LINEAR:
+                       switch( server->input->get_color_model() ) {
+                       DO_INTERP( BC_RGB_FLOAT, bi_linear, 3, float, float, 0x0, 1.0);
+                       DO_INTERP( BC_RGB888, bi_linear, 3, unsigned char, int, 0x0, 0xff);
+                       DO_INTERP( BC_RGBA_FLOAT, bi_linear, 4, float, float, 0x0, 1.0);
+                       DO_INTERP( BC_RGBA8888, bi_linear, 4, unsigned char, int, 0x0, 0xff);
+                       DO_INTERP( BC_YUV888, bi_linear, 3, unsigned char, int, 0x80, 0xff);
+                       DO_INTERP( BC_YUVA8888, bi_linear, 4, unsigned char, int, 0x80, 0xff);
+                       DO_INTERP( BC_RGB161616, bi_linear, 3, uint16_t, int, 0x0, 0xffff);
+                       DO_INTERP( BC_RGBA16161616, bi_linear, 4, uint16_t, int, 0x0, 0xffff);
+                       DO_INTERP( BC_YUV161616, bi_linear, 3, uint16_t, int, 0x8000, 0xffff);
+                       DO_INTERP( BC_YUVA16161616, bi_linear, 4, uint16_t, int, 0x8000, 0xffff);
                        }
-                       else
-                       {
-                               out_row += 3;
+                       break;
+               default:
+               case AffineEngine::AF_CUBIC:
+                       switch( server->input->get_color_model() ) {
+                       DO_INTERP( BC_RGB_FLOAT, bi_cubic, 3, float, float, 0x0, 1.0);
+                       DO_INTERP( BC_RGB888, bi_cubic, 3, unsigned char, int, 0x0, 0xff);
+                       DO_INTERP( BC_RGBA_FLOAT, bi_cubic, 4, float, float, 0x0, 1.0);
+                       DO_INTERP( BC_RGBA8888, bi_cubic, 4, unsigned char, int, 0x0, 0xff);
+                       DO_INTERP( BC_YUV888, bi_cubic, 3, unsigned char, int, 0x80, 0xff);
+                       DO_INTERP( BC_YUVA8888, bi_cubic, 4, unsigned char, int, 0x80, 0xff);
+                       DO_INTERP( BC_RGB161616, bi_cubic, 3, uint16_t, int, 0x0, 0xffff);
+                       DO_INTERP( BC_RGBA16161616, bi_cubic, 4, uint16_t, int, 0x0, 0xffff);
+                       DO_INTERP( BC_YUV161616, bi_cubic, 3, uint16_t, int, 0x8000, 0xffff);
+                       DO_INTERP( BC_YUVA16161616, bi_cubic, 4, uint16_t, int, 0x8000, 0xffff);
                        }
-
-/*  increment the transformed coordinates  */
-                       tx += xinc;
-                       ty += yinc;
-                       tw += winc;
+                       break;
                }
        }
-}
-
-                               break;
-                       case BC_YUVA8888:
-                               TRANSFORM(4, unsigned char, int, 0x80, 0xff)
-                               break;
-                       case BC_RGB161616:
-                               TRANSFORM(3, uint16_t, int, 0x0, 0xffff)
-                               break;
-                       case BC_RGBA16161616:
-                               TRANSFORM(4, uint16_t, int, 0x0, 0xffff)
-                               break;
-                       case BC_YUV161616:
-                               TRANSFORM(3, uint16_t, int, 0x8000, 0xffff)
-                               break;
-                       case BC_YUVA16161616:
-                               TRANSFORM(4, uint16_t, int, 0x8000, 0xffff)
-                               break;
-               }
-
-       }
        else
        {
                int min_x = server->in_x * AFFINE_OVERSAMPLE;
@@ -1135,18 +617,13 @@ void AffineUnit::process_package(LoadPackage *package)
                float h_f = server->in_h;
                float w_f = server->in_w;
 
-
-
-               if(server->use_opengl)
-               {
+               if(server->use_opengl) {
                        return;
                }
 
-
-
 // Projection
-#define DO_STRETCH(type, components) \
-{ \
+#define DO_STRETCH(tag, type, components) \
+case tag: { \
        type **in_rows = (type**)server->input->get_rows(); \
        type **out_rows = (type**)server->temp->get_rows(); \
  \
@@ -1177,59 +654,26 @@ void AffineUnit::process_package(LoadPackage *package)
                        if(components == 4) dst[3] = src[3]; \
                } \
        } \
-}
-
-               switch(server->input->get_color_model())
-               {
-                       case BC_RGB_FLOAT:
-                               DO_STRETCH(float, 3)
-                               break;
-                       case BC_RGB888:
-                               DO_STRETCH(unsigned char, 3)
-                               break;
-                       case BC_RGBA_FLOAT:
-                               DO_STRETCH(float, 4)
-                               break;
-                       case BC_RGBA8888:
-                               DO_STRETCH(unsigned char, 4)
-                               break;
-                       case BC_YUV888:
-                               DO_STRETCH(unsigned char, 3)
-                               break;
-                       case BC_YUVA8888:
-                               DO_STRETCH(unsigned char, 4)
-                               break;
-                       case BC_RGB161616:
-                               DO_STRETCH(uint16_t, 3)
-                               break;
-                       case BC_RGBA16161616:
-                               DO_STRETCH(uint16_t, 4)
-                               break;
-                       case BC_YUV161616:
-                               DO_STRETCH(uint16_t, 3)
-                               break;
-                       case BC_YUVA16161616:
-                               DO_STRETCH(uint16_t, 4)
-                               break;
+} break
+
+               switch( server->input->get_color_model() ) {
+               DO_STRETCH( BC_RGB_FLOAT, float, 3 );
+               DO_STRETCH( BC_RGB888, unsigned char, 3 );
+               DO_STRETCH( BC_RGBA_FLOAT, float, 4 );
+               DO_STRETCH( BC_RGBA8888, unsigned char, 4 );
+               DO_STRETCH( BC_YUV888, unsigned char, 3 );
+               DO_STRETCH( BC_YUVA8888, unsigned char, 4 );
+               DO_STRETCH( BC_RGB161616, uint16_t, 3 );
+               DO_STRETCH( BC_RGBA16161616, uint16_t, 4 );
+               DO_STRETCH( BC_YUV161616, uint16_t, 3 );
+               DO_STRETCH( BC_YUVA16161616, uint16_t, 4 );
                }
        }
-
-
-
-
 }
 
 
-
-
-
-
-AffineEngine::AffineEngine(int total_clients,
-       int total_packages)
- : LoadServer(
-//1, 1
-total_clients, total_packages
-)
+AffineEngine::AffineEngine(int total_clients, int total_packages)
+ : LoadServer(total_clients, total_packages) //(1, 1)
 {
        user_in_viewport = 0;
        user_in_pivot = 0;
@@ -1240,16 +684,17 @@ total_clients, total_packages
        out_x = out_y = out_w = out_h = 0;
        in_pivot_x = in_pivot_y = 0;
        out_pivot_x = out_pivot_y = 0;
+       interpolation = AF_DEFAULT;
        this->total_packages = total_packages;
 }
 
 void AffineEngine::init_packages()
 {
-       for(int i = 0; i < get_total_packages(); i++)
-       {
+       int y1 = 0, npkgs = get_total_packages();
+       for( int i=0; i<npkgs; ) {
                AffinePackage *package = (AffinePackage*)get_package(i);
-               package->y1 = out_y + (out_h * i / get_total_packages());
-               package->y2 = out_y + (out_h * (i + 1) / get_total_packages());
+               int y2 = out_y + (out_h * ++i / npkgs);
+               package->y1 = y1;  package->y2 = y2;  y1 = y2;
        }
 }
 
@@ -1263,96 +708,67 @@ LoadPackage* AffineEngine::new_package()
        return new AffinePackage;
 }
 
-void AffineEngine::process(VFrame *output,
-       VFrame *input,
-       VFrame *temp,
-       int mode,
-       float x1,
-       float y1,
-       float x2,
-       float y2,
-       float x3,
-       float y3,
-       float x4,
-       float y4,
+void AffineEngine::process(VFrame *output, VFrame *input, VFrame *temp, int mode,
+       float x1, float y1, float x2, float y2, float x3, float y3, float x4, float y4,
        int forward)
 {
 
 
 // printf("AffineEngine::process %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f\n",
-// __LINE__,
-// x1,
-// y1,
-// x2,
-// y2,
-// x3,
-// y3,
-// x4,
-// y4);
+// __LINE__, x1, y1, x2, y2, x3, y3, x4, y4);
 //
 // printf("AffineEngine::process %d %d %d %d %d\n",
-// __LINE__,
-// in_x, in_y, in_w, in_h);
+// __LINE__, in_x, in_y, in_w, in_h);
 //
 // printf("AffineEngine::process %d %d %d %d %d\n",
-// __LINE__,
-// out_x, out_y, out_w, out_h);
+// __LINE__, out_x, out_y, out_w, out_h);
 //
 // printf("AffineEngine::process %d %d %d %d %d\n",
-// __LINE__,
-// in_pivot_x, in_pivot_y, out_pivot_x, out_pivot_y);
+// __LINE__, in_pivot_x, in_pivot_y, out_pivot_x, out_pivot_y);
 //
 // printf("AffineEngine::process %d %d %d %d %d\n",
-// __LINE__,
-// user_in_pivot,
-// user_out_pivot,
-// user_in_viewport,
-// user_out_viewport);
+// __LINE__, user_in_pivot, user_out_pivot, user_in_viewport, user_out_viewport);
 
        this->output = output;
        this->input = input;
        this->temp = temp;
        this->mode = mode;
-       this->x1 = x1;
-       this->y1 = y1;
-       this->x2 = x2;
-       this->y2 = y2;
-       this->x3 = x3;
-       this->y3 = y3;
-       this->x4 = x4;
-       this->y4 = y4;
+       this->x1 = x1;  this->y1 = y1;
+       this->x2 = x2;  this->y2 = y2;
+       this->x3 = x3;  this->y3 = y3;
+       this->x4 = x4;  this->y4 = y4;
        this->forward = forward;
 
-
-       if(!user_in_viewport)
-       {
-               in_x = 0;
-               in_y = 0;
+       if(!user_in_viewport) {
+               in_x = 0;  in_y = 0;
                in_w = input->get_w();
                in_h = input->get_h();
        }
 
-       if(!user_out_viewport)
-       {
-               out_x = 0;
-               out_y = 0;
+       if(!user_out_viewport) {
+               out_x = 0;  out_y = 0;
                out_w = output->get_w();
                out_h = output->get_h();
        }
 
-       if(use_opengl)
-       {
+       if(use_opengl) {
                set_package_count(1);
                process_single();
        }
-       else
-       {
+       else {
                set_package_count(total_packages);
                process_packages();
        }
 }
 
-
+void AffineEngine::set_matrix(
+       double in_x1, double in_y1, double in_x2, double in_y2,
+       double out_x1, double out_y1, double out_x2, double out_y2,
+       double out_x3, double out_y3, double out_x4, double out_y4)
+{
+       matrix.set_matrix(in_x1, in_y1, in_x2, in_y2,
+               out_x1, out_y1, out_x2, out_y2, out_x3, out_y3, out_x4, out_y4);
+}
 
 
 void AffineEngine::rotate(VFrame *output,
@@ -1365,10 +781,8 @@ void AffineEngine::rotate(VFrame *output,
        this->mode = ROTATE;
        this->forward = 1;
 
-       if(!user_in_viewport)
-       {
-               in_x = 0;
-               in_y = 0;
+       if( !user_in_viewport ) {
+               in_x = 0;  in_y = 0;
                in_w = input->get_w();
                in_h = input->get_h();
 // DEBUG
@@ -1379,22 +793,18 @@ void AffineEngine::rotate(VFrame *output,
 // printf("AffineEngine::rotate %d %d %d %d %d\n", __LINE__, in_x, in_w, in_y, in_h);
        }
 
-       if(!user_in_pivot)
-       {
+       if( !user_in_pivot ) {
                in_pivot_x = in_x + in_w / 2;
                in_pivot_y = in_y + in_h / 2;
        }
 
-       if(!user_out_viewport)
-       {
-               out_x = 0;
-               out_y = 0;
+       if( !user_out_viewport ) {
+               out_x = 0;  out_y = 0;
                out_w = output->get_w();
                out_h = output->get_h();
        }
 
-       if(!user_out_pivot)
-       {
+       if( !user_out_pivot ) {
                out_pivot_x = out_x + out_w / 2;
                out_pivot_y = out_y + out_h / 2;
        }
@@ -1424,65 +834,39 @@ void AffineEngine::rotate(VFrame *output,
 
 //
 // printf("    angle1=%f angle2=%f angle3=%f angle4=%f\n",
-// angle1 * 360 / 2 / M_PI,
-// angle2 * 360 / 2 / M_PI,
-// angle3 * 360 / 2 / M_PI,
-// angle4 * 360 / 2 / M_PI);
+// angle1 * 360 / 2 / M_PI,  angle2 * 360 / 2 / M_PI,
+// angle3 * 360 / 2 / M_PI,  angle4 * 360 / 2 / M_PI);
 //
 // printf("    radius1=%f radius2=%f radius3=%f radius4=%f\n",
-// radius1,
-// radius2,
-// radius3,
-// radius4);
+// radius1, radius2, radius3, radius4);
 //
 // printf("    x1=%f y1=%f x2=%f y2=%f x3=%f y3=%f x4=%f y4=%f\n",
-// x1 * w / 100,
-// y1 * h / 100,
-// x2 * w / 100,
-// y2 * h / 100,
-// x3 * w / 100,
-// y3 * h / 100,
-// x4 * w / 100,
-// y4 * h / 100);
-
-       if(use_opengl)
-       {
+// x1 * w / 100, y1 * h / 100,
+// x2 * w / 100, y2 * h / 100,
+// x3 * w / 100, y3 * h / 100,
+// x4 * w / 100, y4 * h / 100);
+
+       if(use_opengl) {
                set_package_count(1);
                process_single();
        }
-       else
-       {
+       else {
                set_package_count(total_packages);
                process_packages();
        }
 }
 
-void AffineEngine::set_matrix(AffineMatrix *matrix)
-{
-       for(int i = 0; i < 3; i++)
-       {
-               for(int j = 0; j < 3; j++)
-               {
-                       this->matrix.values[i][j] = matrix->values[i][j];
-               }
-       }
-}
-
 void AffineEngine::set_in_viewport(int x, int y, int w, int h)
 {
-       this->in_x = x;
-       this->in_y = y;
-       this->in_w = w;
-       this->in_h = h;
+       this->in_x = x;  this->in_y = y;
+       this->in_w = w;  this->in_h = h;
        this->user_in_viewport = 1;
 }
 
 void AffineEngine::set_out_viewport(int x, int y, int w, int h)
 {
-       this->out_x = x;
-       this->out_y = y;
-       this->out_w = w;
-       this->out_h = h;
+       this->out_x = x;  this->out_y = y;
+       this->out_w = w;  this->out_h = h;
        this->user_out_viewport = 1;
 }
 
@@ -1530,3 +914,7 @@ void AffineEngine::unset_viewport()
 }
 
 
+void AffineEngine::set_interpolation(int type)
+{
+       interpolation = type;
+}