X-Git-Url: http://git.cinelerra-gg.org/git/?a=blobdiff_plain;f=cinelerra-5.0%2Fcinelerra%2Foverlayframe.C;fp=cinelerra-5.0%2Fcinelerra%2Foverlayframe.C;h=0000000000000000000000000000000000000000;hb=30bdb85eb33a8ee7ba675038a86c6be59c43d7bd;hp=c660dbbb745539b86ab930dad1a9bedc76cf9e07;hpb=52fcc46226f9df46f9ce9d0566dc568455a7db0b;p=goodguy%2Fhistory.git diff --git a/cinelerra-5.0/cinelerra/overlayframe.C b/cinelerra-5.0/cinelerra/overlayframe.C deleted file mode 100644 index c660dbbb..00000000 --- a/cinelerra-5.0/cinelerra/overlayframe.C +++ /dev/null @@ -1,1215 +0,0 @@ - -/* - * CINELERRA - * Copyright (C) 2008 Adam Williams - * Copyright (C) 2012 Monty - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include - -#include "clip.h" -#include "edl.inc" -#include "mutex.h" -#include "overlayframe.h" -#include "units.h" -#include "vframe.h" - -static inline int mabs(int32_t v) { return abs(v); } -static inline int mabs(int64_t v) { return llabs(v); } -static inline float mabs(float v) { return fabsf(v); } - -static inline int32_t aclip(int32_t v, int mx) { - return v < 0 ? 0 : v > mx ? mx : v; -} -static inline int64_t aclip(int64_t v, int mx) { - return v < 0 ? 0 : v > mx ? mx : v; -} -static inline float aclip(float v, float mx) { - return v < 0 ? 0 : v > mx ? mx : v; -} -static inline float aclip(float v, int mx) { - return v < 0 ? 0 : v > mx ? mx : v; -} -static inline int aclip(int v, float mx) { - return v < 0 ? 0 : v > mx ? mx : v; -} -static inline int32_t cclip(int32_t v, int mx) { - return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v; -} -static inline int64_t cclip(int64_t v, int mx) { - return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v; -} -static inline float cclip(float v, float mx) { - return v > (mx/=2) ? mx : v < (mx=(-mx)) ? mx : v; -} -static inline float cclip(float v, int mx) { - return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v; -} -static inline int cclip(int v, float mx) { - return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v; -} - -/* - * New resampler code; replace the original somehwat blurry engine - * with a fairly standard kernel resampling core. This could be used - * for full affine transformation but only implements scale/translate. - * Mostly reuses the old blending macro code. - * - * Pixel convention: - * - * 1) Pixels are points, not areas or squares. - * - * 2) To maintain the usual edge and scaling conventions, pixels are - * set inward from the image edge, eg, the left edge of an image is - * at pixel location x=-.5, not x=0. Although pixels are not - * squares, the usual way of stating this is 'the pixel is located - * at the center of its square'. - * - * 3) Because of 1 and 2, we must truncate and weight the kernel - * convolution at the edge of the input area. Otherwise, all - * resampled areas would be bordered by a transparency halo. E.g. - * in the old engine, upsampling HDV to 1920x1080 results in the - * left and right edges being partially transparent and underlying - * layers shining through. - * - * 4) The contribution of fractional pixels at the edges of input - * ranges are weighted according to the fraction. Note that the - * kernel weighting is adjusted, not the opacity. This is one - * exception to 'pixels have no area'. - * - * 5) The opacity of fractional pixels at the edges of the output - * range is adjusted according to the fraction. This is the other - * exception to 'pixels have no area'. - * - * Fractional alpha blending has been modified across the board from: - * output_alpha = input_alpha > output_alpha ? input_alpha : output_alpha; - * to: - * output_alpha = output_alpha + ((max - output_alpha) * input_alpha) / max; - */ - -#define TRANSFORM_SPP (4096) /* number of data pts per unit x in lookup table */ -#define INDEX_FRACTION (8) /* bits of fraction past TRANSFORM_SPP on kernel - index accumulation */ -#define TRANSFORM_MIN (.5 / TRANSFORM_SPP) - -/* Sinc needed for Lanczos kernel */ -static float sinc(const float x) -{ - float y = x * M_PI; - - if(fabsf(x) < TRANSFORM_MIN) - return 1.0f; - - return sinf(y) / y; -} - -/* - * All resampling (except Nearest Neighbor) is performed via - * transformed 2D resampling kernels bult from 1D lookups. - */ -OverlayKernel::OverlayKernel(int interpolation_type) -{ - int i; - this->type = interpolation_type; - - switch(interpolation_type) - { - case BILINEAR: - width = 1.f; - lookup = new float[(n = TRANSFORM_SPP) + 1]; - for (i = 0; i <= TRANSFORM_SPP; i++) - lookup[i] = (float)(TRANSFORM_SPP - i) / TRANSFORM_SPP; - break; - - /* Use a Catmull-Rom filter (not b-spline) */ - case BICUBIC: - width = 2.; - lookup = new float[(n = 2 * TRANSFORM_SPP) + 1]; - for(i = 0; i <= TRANSFORM_SPP; i++) { - float x = i / (float)TRANSFORM_SPP; - lookup[i] = 1.f - 2.5f * x * x + 1.5f * x * x * x; - } - for(; i <= 2 * TRANSFORM_SPP; i++) { - float x = i / (float)TRANSFORM_SPP; - lookup[i] = 2.f - 4.f * x + 2.5f * x * x - .5f * x * x * x; - } - break; - - case LANCZOS: - width = 3.; - lookup = new float[(n = 3 * TRANSFORM_SPP) + 1]; - for (i = 0; i <= 3 * TRANSFORM_SPP; i++) - lookup[i] = sinc((float)i / TRANSFORM_SPP) * - sinc((float)i / TRANSFORM_SPP / 3.0f); - break; - - default: - width = 0.; - lookup = 0; - n = 0; - break; - } -} - -OverlayKernel::~OverlayKernel() -{ - if(lookup) delete [] lookup; -} - -OverlayFrame::OverlayFrame(int cpus) -{ - direct_engine = 0; - nn_engine = 0; - sample_engine = 0; - temp_frame = 0; - memset(kernel, 0, sizeof(kernel)); - this->cpus = cpus; -} - -OverlayFrame::~OverlayFrame() -{ - if(temp_frame) delete temp_frame; - - if(direct_engine) delete direct_engine; - if(nn_engine) delete nn_engine; - if(sample_engine) delete sample_engine; - - if(kernel[NEAREST_NEIGHBOR]) delete kernel[NEAREST_NEIGHBOR]; - if(kernel[BILINEAR]) delete kernel[BILINEAR]; - if(kernel[BICUBIC]) delete kernel[BICUBIC]; - if(kernel[LANCZOS]) delete kernel[LANCZOS]; -} - -static float epsilon_snap(float f) -{ - return rintf(f * 1024) / 1024.; -} - -int OverlayFrame::overlay(VFrame *output, VFrame *input, - float in_x1, float in_y1, float in_x2, float in_y2, - float out_x1, float out_y1, float out_x2, float out_y2, - float alpha, int mode, int interpolation_type) -{ - in_x1 = epsilon_snap(in_x1); - in_x2 = epsilon_snap(in_x2); - in_y1 = epsilon_snap(in_y1); - in_y2 = epsilon_snap(in_y2); - out_x1 = epsilon_snap(out_x1); - out_x2 = epsilon_snap(out_x2); - out_y1 = epsilon_snap(out_y1); - out_y2 = epsilon_snap(out_y2); - - if (isnan(in_x1) || isnan(in_x2) || - isnan(in_y1) || isnan(in_y2) || - isnan(out_x1) || isnan(out_x2) || - isnan(out_y1) || isnan(out_y2)) return 1; - - if(in_x1 < 0) in_x1 = 0; - if(in_y1 < 0) in_y1 = 0; - if(in_x2 > input->get_w()) in_x2 = input->get_w(); - if(in_y2 > input->get_h()) in_y2 = input->get_h(); - if(out_x1 < 0) out_x1 = 0; - if(out_y1 < 0) out_y1 = 0; - if(out_x2 > output->get_w()) out_x2 = output->get_w(); - if(out_y2 > output->get_h()) out_y2 = output->get_h(); - - float xscale = (out_x2 - out_x1) / (in_x2 - in_x1); - float yscale = (out_y2 - out_y1) / (in_y2 - in_y1); - - /* don't interpolate integer translations, or scale no-ops */ - if(xscale == 1. && yscale == 1. && - (int)in_x1 == in_x1 && (int)in_x2 == in_x2 && - (int)in_y1 == in_y1 && (int)in_y2 == in_y2 && - (int)out_x1 == out_x1 && (int)out_x2 == out_x2 && - (int)out_y1 == out_y1 && (int)out_y2 == out_y2) { - if(!direct_engine) direct_engine = new DirectEngine(cpus); - - direct_engine->output = output; direct_engine->input = input; - direct_engine->in_x1 = in_x1; direct_engine->in_y1 = in_y1; - direct_engine->out_x1 = out_x1; direct_engine->out_x2 = out_x2; - direct_engine->out_y1 = out_y1; direct_engine->out_y2 = out_y2; - direct_engine->alpha = alpha; direct_engine->mode = mode; - direct_engine->process_packages(); - } - else if(interpolation_type == NEAREST_NEIGHBOR) { - if(!nn_engine) nn_engine = new NNEngine(cpus); - nn_engine->output = output; nn_engine->input = input; - nn_engine->in_x1 = in_x1; nn_engine->in_x2 = in_x2; - nn_engine->in_y1 = in_y1; nn_engine->in_y2 = in_y2; - nn_engine->out_x1 = out_x1; nn_engine->out_x2 = out_x2; - nn_engine->out_y1 = out_y1; nn_engine->out_y2 = out_y2; - nn_engine->alpha = alpha; nn_engine->mode = mode; - nn_engine->process_packages(); - } - else { - int xtype = BILINEAR; - int ytype = BILINEAR; - - switch(interpolation_type) - { - case CUBIC_CUBIC: // Bicubic enlargement and reduction - xtype = ytype = BICUBIC; - break; - case CUBIC_LINEAR: // Bicubic enlargement and bilinear reduction - xtype = xscale > 1. ? BICUBIC : BILINEAR; - ytype = yscale > 1. ? BICUBIC : BILINEAR; - break; - case LINEAR_LINEAR: // Bilinear enlargement and bilinear reduction - xtype = ytype = BILINEAR; - break; - case LANCZOS_LANCZOS: // Because we can - xtype = ytype = LANCZOS; - break; - } - - if(xscale == 1. && (int)in_x1 == in_x1 && (int)in_x2 == in_x2 && - (int)out_x1 == out_x1 && (int)out_x2 == out_x2) - xtype = DIRECT_COPY; - - if(yscale == 1. && (int)in_y1 == in_y1 && (int)in_y2 == in_y2 && - (int)out_y1 == out_y1 && (int)out_y2 == out_y2) - ytype = DIRECT_COPY; - - if(!kernel[xtype]) - kernel[xtype] = new OverlayKernel(xtype); - if(!kernel[ytype]) - kernel[ytype] = new OverlayKernel(ytype); - -/* - * horizontal and vertical are separately resampled. First we - * resample the input along X into a transposed, temporary frame, - * then resample/transpose the temporary space along X into the - * output. Fractional pixels along the edge are handled in the X - * direction of each step - */ - // resampled dimension matches the transposed output space - float temp_y1 = out_x1 - floor(out_x1); - float temp_y2 = temp_y1 + (out_x2 - out_x1); - int temp_h = ceil(temp_y2); - - // non-resampled dimension merely cropped - float temp_x1 = in_y1 - floor(in_y1); - float temp_x2 = temp_x1 + (in_y2 - in_y1); - int temp_w = ceil(temp_x2); - - if( temp_frame && - (temp_frame->get_color_model() != input->get_color_model() || - temp_frame->get_w() != temp_w || temp_frame->get_h() != temp_h) ) { - delete temp_frame; - temp_frame = 0; - } - - if(!temp_frame) { - temp_frame = new VFrame(0, -1, temp_w, temp_h, - input->get_color_model(), -1); - } - - temp_frame->clear_frame(); - - if(!sample_engine) sample_engine = new SampleEngine(cpus); - - sample_engine->output = temp_frame; - sample_engine->input = input; - sample_engine->kernel = kernel[xtype]; - sample_engine->col_out1 = 0; - sample_engine->col_out2 = temp_w; - sample_engine->row_in = floor(in_y1); - - sample_engine->in1 = in_x1; - sample_engine->in2 = in_x2; - sample_engine->out1 = temp_y1; - sample_engine->out2 = temp_y2; - sample_engine->alpha = 1.; - sample_engine->mode = TRANSFER_REPLACE; - sample_engine->process_packages(); - - sample_engine->output = output; - sample_engine->input = temp_frame; - sample_engine->kernel = kernel[ytype]; - sample_engine->col_out1 = floor(out_x1); - sample_engine->col_out2 = ceil(out_x2); - sample_engine->row_in = 0; - - sample_engine->in1 = temp_x1; - sample_engine->in2 = temp_x2; - sample_engine->out1 = out_y1; - sample_engine->out2 = out_y2; - sample_engine->alpha = alpha; - sample_engine->mode = mode; - sample_engine->process_packages(); - } - return 0; -} - -// NORMAL [Sa * Sa + Da * (1 - Sa), Sc * Sa + Dc * (1 - Sa)]) -#define ALPHA_NORMAL(mx, Sa, Da) ((Sa * Sa + Da * (mx - Sa)) / mx) -#define COLOR_NORMAL(mx, Sc, Sa, Dc, Da) ((Sc * Sa + Dc * (mx - Sa)) / mx) -#define CHROMA_NORMAL COLOR_NORMAL - -// ADDITION [(Sa + Da), (Sc + Dc)] -#define ALPHA_ADDITION(mx, Sa, Da) (Sa + Da) -#define COLOR_ADDITION(mx, Sc, Sa, Dc, Da) (Sc + Dc) -#define CHROMA_ADDITION(mx, Sc, Sa, Dc, Da) (Sc + Dc) - -// SUBTRACT [(Sa - Da), (Sc - Dc)] -#define ALPHA_SUBTRACT(mx, Sa, Da) (Sa - Da) -#define COLOR_SUBTRACT(mx, Sc, Sa, Dc, Da) (Sc - Dc) -#define CHROMA_SUBTRACT(mx, Sc, Sa, Dc, Da) (Sc - Dc) - -// MULTIPLY [(Sa * Da), Sc * Dc] -#define ALPHA_MULTIPLY(mx, Sa, Da) ((Sa * Da) / mx) -#define COLOR_MULTIPLY(mx, Sc, Sa, Dc, Da) ((Sc * Dc) / mx) -#define CHROMA_MULTIPLY(mx, Sc, Sa, Dc, Da) ((Sc * Dc) / mx) - -// DIVIDE [(Sa / Da), (Sc / Dc)] -#define ALPHA_DIVIDE(mx, Sa, Da) (Da ? ((Sa * mx) / Da) : mx) -#define COLOR_DIVIDE(mx, Sc, Sa, Dc, Da) (Dc ? ((Sc * mx) / Dc) : mx) -#define CHROMA_DIVIDE(mx, Sc, Sa, Dc, Da) (Dc ? ((Sc * mx) / Dc) : mx) - -// REPLACE [Sa, Sc] (fade = 1) -#define ALPHA_REPLACE(mx, Sa, Da) Sa -#define COLOR_REPLACE(mx, Sc, Sa, Dc, Da) Sc -#define CHROMA_REPLACE COLOR_REPLACE - -// MAX [max(Sa, Da), MAX(Sc, Dc)] -#define ALPHA_MAX(mx, Sa, Da) (Sa > Da ? Sa : Da) -#define COLOR_MAX(mx, Sc, Sa, Dc, Da) (Sc > Dc ? Sc : Dc) -#define CHROMA_MAX(mx, Sc, Sa, Dc, Da) (mabs(Sc) > mabs(Dc) ? Sc : Dc) - -// MIN [min(Sa, Da), MIN(Sc, Dc)] -#define ALPHA_MIN(mx, Sa, Da) (Sa < Da ? Sa : Da) -#define COLOR_MIN(mx, Sc, Sa, Dc, Da) (Sc < Dc ? Sc : Dc) -#define CHROMA_MIN(mx, Sc, Sa, Dc, Da) (mabs(Sc) < mabs(Dc) ? Sc : Dc) - -// AVERAGE [(Sa + Da) * 0.5, (Sc + Dc) * 0.5] -#define ALPHA_AVERAGE(mx, Sa, Da) ((Sa + Da) / 2) -#define COLOR_AVERAGE(mx, Sc, Sa, Dc, Da) ((Sc + Dc) / 2) -#define CHROMA_AVERAGE COLOR_AVERAGE - -// DARKEN [Sa + Da - Sa*Da, Sc*(1 - Da) + Dc*(1 - Sa) + min(Sc, Dc)] -#define ALPHA_DARKEN(mx, Sa, Da) (Sa + Da - (Sa * Da) / mx) -#define COLOR_DARKEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (Sc < Dc ? Sc : Dc)) -#define CHROMA_DARKEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (mabs(Sc) < mabs(Dc) ? Sc : Dc)) - -// LIGHTEN [Sa + Da - Sa*Da, Sc*(1 - Da) + Dc*(1 - Sa) + max(Sc, Dc)] -#define ALPHA_LIGHTEN(mx, Sa, Da) (Sa + Da - Sa * Da / mx) -#define COLOR_LIGHTEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (Sc > Dc ? Sc : Dc)) -#define CHROMA_LIGHTEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (mabs(Sc) > mabs(Dc) ? Sc : Dc)) - -// DST [Da, Dc] -#define ALPHA_DST(mx, Sa, Da) Da -#define COLOR_DST(mx, Sc, Sa, Dc, Da) Dc -#define CHROMA_DST COLOR_DST - -// DST_ATOP [Sa, Sc * (1 - Da) + Dc * Sa] -#define ALPHA_DST_ATOP(mx, Sa, Da) Sa -#define COLOR_DST_ATOP(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * Sa) / mx) -#define CHROMA_DST_ATOP COLOR_DST_ATOP - -// DST_IN [Da * Sa, Dc * Sa] -#define ALPHA_DST_IN(mx, Sa, Da) ((Da * Sa) / mx) -#define COLOR_DST_IN(mx, Sc, Sa, Dc, Da) ((Dc * Sa) / mx) -#define CHROMA_DST_IN COLOR_DST_IN - -// DST_OUT [Da * (1 - Sa), Dc * (1 - Sa)] -#define ALPHA_DST_OUT(mx, Sa, Da) (Da * (mx - Sa) / mx) -#define COLOR_DST_OUT(mx, Sc, Sa, Dc, Da) (Dc * (mx - Sa) / mx) -#define CHROMA_DST_OUT COLOR_DST_OUT - -// DST_OVER [Sa * (1 - Da) + Da, Sc * (1 - Da) + Dc] -#define ALPHA_DST_OVER(mx, Sa, Da) ((Sa * (mx - Da)) / mx + Da) -#define COLOR_DST_OVER(mx, Sc, Sa, Dc, Da) (Sc * (mx - Da)/ mx + Dc) -#define CHROMA_DST_OVER COLOR_DST_OVER - -// SRC [Sa, Sc] -#define ALPHA_SRC(mx, Sa, Da) Sa -#define COLOR_SRC(mx, Sc, Sa, Dc, Da) Sc -#define CHROMA_SRC COLOR_SRC - -// SRC_ATOP [Da, Sc * Da + Dc * (1 - Sa)] -#define ALPHA_SRC_ATOP(mx, Sa, Da) Da -#define COLOR_SRC_ATOP(mx, Sc, Sa, Dc, Da) ((Sc * Da + Dc * (mx - Sa)) / mx) -#define CHROMA_SRC_ATOP COLOR_SRC_ATOP - -// SRC_IN [Sa * Da, Sc * Da] -#define ALPHA_SRC_IN(mx, Sa, Da) ((Sa * Da) / mx) -#define COLOR_SRC_IN(mx, Sc, Sa, Dc, Da) (Sc * Da / mx) -#define CHROMA_SRC_IN COLOR_SRC_IN - -// SRC_OUT [Sa * (1 - Da), Sc * (1 - Da)] -#define ALPHA_SRC_OUT(mx, Sa, Da) (Sa * (mx - Da) / mx) -#define COLOR_SRC_OUT(mx, Sc, Sa, Dc, Da) (Sc * (mx - Da) / mx) -#define CHROMA_SRC_OUT COLOR_SRC_OUT - -// SRC_OVER [Sa + Da * (1 - Sa), Sc + (1 - Sa) * Dc] -#define ALPHA_SRC_OVER(mx, Sa, Da) (Sa + Da * (mx - Sa) / mx) -#define COLOR_SRC_OVER(mx, Sc, Sa, Dc, Da) (Sc + Dc * (mx - Sa) / mx) -#define CHROMA_SRC_OVER COLOR_SRC_OVER - -// OR [Sa + Da - Sa * Da, Sc + Dc - Sc * Dc] -#define ALPHA_OR(mx, Sa, Da) (Sa + Da - (Sa * Da) / mx) -#define COLOR_OR(mx, Sc, Sa, Dc, Da) (Sc + Dc - (Sc * Dc) / mx) -#define CHROMA_OR COLOR_OR - -// XOR [Sa * (1 - Da) + Da * (1 - Sa), Sc * (1 - Da) + Dc * (1 - Sa)] -#define ALPHA_XOR(mx, Sa, Da) ((Sa * (mx - Da) + Da * (mx - Sa)) / mx) -#define COLOR_XOR(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx) -#define CHROMA_XOR COLOR_XOR - -#define ZTYP(ty) typedef ty z_##ty __attribute__ ((__unused__)) -ZTYP(int8_t); ZTYP(uint8_t); -ZTYP(int16_t); ZTYP(uint16_t); -ZTYP(int32_t); ZTYP(uint32_t); -ZTYP(int64_t); ZTYP(uint64_t); -ZTYP(float); ZTYP(double); - -#define ALPHA3_BLEND(FN, typ, inp, out, mx, ofs, rnd) \ - typ inp0 = (typ)inp[0], inp1 = (typ)inp[1] - ofs; \ - typ inp2 = (typ)inp[2] - ofs, inp3 = fade * mx + rnd; \ - typ out0 = (typ)out[0], out1 = (typ)out[1] - ofs; \ - typ out2 = (typ)out[2] - ofs, out3 = mx; \ - r = COLOR_##FN(mx, inp0, inp3, out0, out3); \ - if( ofs ) { \ - g = CHROMA_##FN(mx, inp1, inp3, out1, out3); \ - b = CHROMA_##FN(mx, inp2, inp3, out2, out3); \ - } \ - else { \ - g = COLOR_##FN(mx, inp1, inp3, out1, out3); \ - b = COLOR_##FN(mx, inp2, inp3, out2, out3); \ - } - -#define ALPHA4_BLEND(FN, typ, inp, out, mx, ofs, rnd) \ - typ inp0 = (typ)inp[0], inp1 = (typ)inp[1] - ofs; \ - typ inp2 = (typ)inp[2] - ofs, inp3 = (typ)inp[3] * fade + rnd; \ - typ out0 = (typ)out[0], out1 = (typ)out[1] - ofs; \ - typ out2 = (typ)out[2] - ofs, out3 = out[3]; \ - r = COLOR_##FN(mx, inp0, inp3, out0, out3); \ - if( ofs ) { \ - g = CHROMA_##FN(mx, inp1, inp3, out1, out3); \ - b = CHROMA_##FN(mx, inp2, inp3, out2, out3); \ - } \ - else { \ - g = COLOR_##FN(mx, inp1, inp3, out1, out3); \ - b = COLOR_##FN(mx, inp2, inp3, out2, out3); \ - } \ - a = ALPHA_##FN(mx, inp3, out3) - -#define ALPHA_STORE(out, ofs, mx) \ - out[0] = r; \ - out[1] = g + ofs; \ - out[2] = b + ofs - -#define ALPHA3_STORE(out, ofs, mx) \ - r = aclip(r, mx); \ - g = ofs ? cclip(g, mx) : aclip(g, mx); \ - b = ofs ? cclip(b, mx) : aclip(b, mx); \ - if( trnsp ) { \ - r = (r * opcty + out0 * trnsp) / mx; \ - g = (g * opcty + out1 * trnsp) / mx; \ - b = (b * opcty + out2 * trnsp) / mx; \ - } \ - ALPHA_STORE(out, ofs, mx) - -#define ALPHA4_STORE(out, ofs, mx) \ - r = aclip(r, mx); \ - g = ofs ? cclip(g, mx) : aclip(g, mx); \ - b = ofs ? cclip(b, mx) : aclip(b, mx); \ - if( trnsp ) { \ - r = (r * opcty + out0 * trnsp) / mx; \ - g = (g * opcty + out1 * trnsp) / mx; \ - b = (b * opcty + out2 * trnsp) / mx; \ - a = (a * opcty + out3 * trnsp) / mx; \ - } \ - ALPHA_STORE(out, ofs, mx); \ - out[3] = aclip(a, mx) - -#define XBLEND(FN, temp_type, type, max, components, chroma_offset, round) { \ - temp_type opcty = alpha * max + round, trnsp = max - opcty; \ - type** output_rows = (type**)output->get_rows(); \ - type** input_rows = (type**)input->get_rows(); \ - ix *= components; ox *= components; \ - \ - for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \ - type* in_row = input_rows[i + iy] + ix; \ - type* output = output_rows[i] + ox; \ - for(int j = 0; j < ow; j++) { \ - if( components == 4 ) { \ - temp_type r, g, b, a; \ - ALPHA4_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \ - ALPHA4_STORE(output, chroma_offset, max); \ - } \ - else { \ - temp_type r, g, b; \ - ALPHA3_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \ - ALPHA3_STORE(output, chroma_offset, max); \ - } \ - in_row += components; output += components; \ - } \ - } \ - break; \ -} - -#define XBLEND_ONLY(FN) { \ - switch(input->get_color_model()) { \ - case BC_RGB_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 3, 0, 0.f); \ - case BC_RGBA_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 4, 0, 0.f); \ - case BC_RGB888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \ - case BC_YUV888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \ - case BC_RGBA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \ - case BC_YUVA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \ - case BC_RGB161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \ - case BC_YUV161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \ - case BC_RGBA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \ - case BC_YUVA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \ - } \ - break; \ -} - -/* Direct translate / blend **********************************************/ - -DirectPackage::DirectPackage() -{ -} - -DirectUnit::DirectUnit(DirectEngine *server) - : LoadClient(server) -{ - this->engine = server; -} - -DirectUnit::~DirectUnit() -{ -} - -void DirectUnit::process_package(LoadPackage *package) -{ - DirectPackage *pkg = (DirectPackage*)package; - - VFrame *output = engine->output; - VFrame *input = engine->input; - int mode = engine->mode; - float fade = engine->alpha; - float alpha = - BC_CModels::has_alpha(input->get_color_model()) && - mode == TRANSFER_REPLACE ? 1.f : engine->alpha; - - int ix = engine->in_x1; - int ox = engine->out_x1; - int ow = engine->out_x2 - ox; - int iy = engine->in_y1 - engine->out_y1; - - switch( mode ) { - case TRANSFER_NORMAL: XBLEND_ONLY(NORMAL); - case TRANSFER_ADDITION: XBLEND_ONLY(ADDITION); - case TRANSFER_SUBTRACT: XBLEND_ONLY(SUBTRACT); - case TRANSFER_MULTIPLY: XBLEND_ONLY(MULTIPLY); - case TRANSFER_DIVIDE: XBLEND_ONLY(DIVIDE); - case TRANSFER_REPLACE: XBLEND_ONLY(REPLACE); - case TRANSFER_MAX: XBLEND_ONLY(MAX); - case TRANSFER_MIN: XBLEND_ONLY(MIN); - case TRANSFER_AVERAGE: XBLEND_ONLY(AVERAGE); - case TRANSFER_DARKEN: XBLEND_ONLY(DARKEN); - case TRANSFER_LIGHTEN: XBLEND_ONLY(LIGHTEN); - case TRANSFER_DST: XBLEND_ONLY(DST); - case TRANSFER_DST_ATOP: XBLEND_ONLY(DST_ATOP); - case TRANSFER_DST_IN: XBLEND_ONLY(DST_IN); - case TRANSFER_DST_OUT: XBLEND_ONLY(DST_OUT); - case TRANSFER_DST_OVER: XBLEND_ONLY(DST_OVER); - case TRANSFER_SRC: XBLEND_ONLY(SRC); - case TRANSFER_SRC_ATOP: XBLEND_ONLY(SRC_ATOP); - case TRANSFER_SRC_IN: XBLEND_ONLY(SRC_IN); - case TRANSFER_SRC_OUT: XBLEND_ONLY(SRC_OUT); - case TRANSFER_SRC_OVER: XBLEND_ONLY(SRC_OVER); - case TRANSFER_OR: XBLEND_ONLY(OR); - case TRANSFER_XOR: XBLEND_ONLY(XOR); - } -} - -DirectEngine::DirectEngine(int cpus) - : LoadServer(cpus, cpus) -{ -} - -DirectEngine::~DirectEngine() -{ -} - -void DirectEngine::init_packages() -{ - if(in_x1 < 0) { out_x1 -= in_x1; in_x1 = 0; } - if(in_y1 < 0) { out_y1 -= in_y1; in_y1 = 0; } - if(out_x1 < 0) { in_x1 -= out_x1; out_x1 = 0; } - if(out_y1 < 0) { in_y1 -= out_y1; out_y1 = 0; } - if(out_x2 > output->get_w()) out_x2 = output->get_w(); - if(out_y2 > output->get_h()) out_y2 = output->get_h(); - int out_w = out_x2 - out_x1; - int out_h = out_y2 - out_y1; - if( !out_w || !out_h ) return; - - int rows = out_h; - int pkgs = get_total_packages(); - int row1 = out_y1, row2 = row1; - for(int i = 0; i < pkgs; row1=row2 ) { - DirectPackage *package = (DirectPackage*)get_package(i); - row2 = ++i * rows / pkgs + out_y1; - package->out_row1 = row1; - package->out_row2 = row2; - } -} - -LoadClient* DirectEngine::new_client() -{ - return new DirectUnit(this); -} - -LoadPackage* DirectEngine::new_package() -{ - return new DirectPackage; -} - -/* Nearest Neighbor scale / translate / blend ********************/ - -#define XBLEND_3NN(FN, temp_type, type, max, components, chroma_offset, round) { \ - temp_type opcty = alpha * max + round, trnsp = max - opcty; \ - type** output_rows = (type**)output->get_rows(); \ - type** input_rows = (type**)input->get_rows(); \ - ox *= components; \ - \ - for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \ - int *lx = engine->in_lookup_x; \ - type* in_row = input_rows[*ly++]; \ - type* output = output_rows[i] + ox; \ - for(int j = 0; j < ow; j++) { \ - in_row += *lx++; \ - if( components == 4 ) { \ - temp_type r, g, b, a; \ - ALPHA4_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \ - ALPHA4_STORE(output, chroma_offset, max); \ - } \ - else { \ - temp_type r, g, b; \ - ALPHA3_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \ - ALPHA3_STORE(output, chroma_offset, max); \ - } \ - output += components; \ - } \ - } \ - break; \ -} - -#define XBLEND_NN(FN) { \ - switch(input->get_color_model()) { \ - case BC_RGB_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 3, 0, 0.f); \ - case BC_RGBA_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 4, 0, 0.f); \ - case BC_RGB888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \ - case BC_YUV888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \ - case BC_RGBA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \ - case BC_YUVA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \ - case BC_RGB161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \ - case BC_YUV161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \ - case BC_RGBA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \ - case BC_YUVA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \ - } \ - break; \ -} - -NNPackage::NNPackage() -{ -} - -NNUnit::NNUnit(NNEngine *server) - : LoadClient(server) -{ - this->engine = server; -} - -NNUnit::~NNUnit() -{ -} - -void NNUnit::process_package(LoadPackage *package) -{ - NNPackage *pkg = (NNPackage*)package; - VFrame *output = engine->output; - VFrame *input = engine->input; - int mode = engine->mode; - float fade = engine->alpha; - float alpha = - BC_CModels::has_alpha(input->get_color_model()) && - mode == TRANSFER_REPLACE ? 1.f : engine->alpha; - - int ox = engine->out_x1i; - int ow = engine->out_x2i - ox; - int *ly = engine->in_lookup_y + pkg->out_row1; - - switch( mode ) { - case TRANSFER_NORMAL: XBLEND_NN(NORMAL); - case TRANSFER_ADDITION: XBLEND_NN(ADDITION); - case TRANSFER_SUBTRACT: XBLEND_NN(SUBTRACT); - case TRANSFER_MULTIPLY: XBLEND_NN(MULTIPLY); - case TRANSFER_DIVIDE: XBLEND_NN(DIVIDE); - case TRANSFER_REPLACE: XBLEND_NN(REPLACE); - case TRANSFER_MAX: XBLEND_NN(MAX); - case TRANSFER_MIN: XBLEND_NN(MIN); - case TRANSFER_AVERAGE: XBLEND_NN(AVERAGE); - case TRANSFER_DARKEN: XBLEND_NN(DARKEN); - case TRANSFER_LIGHTEN: XBLEND_NN(LIGHTEN); - case TRANSFER_DST: XBLEND_NN(DST); - case TRANSFER_DST_ATOP: XBLEND_NN(DST_ATOP); - case TRANSFER_DST_IN: XBLEND_NN(DST_IN); - case TRANSFER_DST_OUT: XBLEND_NN(DST_OUT); - case TRANSFER_DST_OVER: XBLEND_NN(DST_OVER); - case TRANSFER_SRC: XBLEND_NN(SRC); - case TRANSFER_SRC_ATOP: XBLEND_NN(SRC_ATOP); - case TRANSFER_SRC_IN: XBLEND_NN(SRC_IN); - case TRANSFER_SRC_OUT: XBLEND_NN(SRC_OUT); - case TRANSFER_SRC_OVER: XBLEND_NN(SRC_OVER); - case TRANSFER_OR: XBLEND_NN(OR); - case TRANSFER_XOR: XBLEND_NN(XOR); - } -} - -NNEngine::NNEngine(int cpus) - : LoadServer(cpus, cpus) -{ - in_lookup_x = 0; - in_lookup_y = 0; -} - -NNEngine::~NNEngine() -{ - if(in_lookup_x) - delete[] in_lookup_x; - if(in_lookup_y) - delete[] in_lookup_y; -} - -void NNEngine::init_packages() -{ - int in_w = input->get_w(); - int in_h = input->get_h(); - int out_w = output->get_w(); - int out_h = output->get_h(); - - float in_subw = in_x2 - in_x1; - float in_subh = in_y2 - in_y1; - float out_subw = out_x2 - out_x1; - float out_subh = out_y2 - out_y1; - int first, last, count, i; - int components = 3; - - out_x1i = rint(out_x1); - out_x2i = rint(out_x2); - if(out_x1i < 0) out_x1i = 0; - if(out_x1i > out_w) out_x1i = out_w; - if(out_x2i < 0) out_x2i = 0; - if(out_x2i > out_w) out_x2i = out_w; - int out_wi = out_x2i - out_x1i; - if( !out_wi ) return; - - delete[] in_lookup_x; - in_lookup_x = new int[out_wi]; - delete[] in_lookup_y; - in_lookup_y = new int[out_h]; - - switch(input->get_color_model()) { - case BC_RGBA_FLOAT: - case BC_RGBA8888: - case BC_YUVA8888: - case BC_RGBA16161616: - components = 4; - break; - } - - first = count = 0; - - for(i = out_x1i; i < out_x2i; i++) { - int in = (i - out_x1 + .5) * in_subw / out_subw + in_x1; - if(in < in_x1) - in = in_x1; - if(in > in_x2) - in = in_x2; - - if(in >= 0 && in < in_w && in >= in_x1 && i >= 0 && i < out_w) { - if(count == 0) { - first = i; - in_lookup_x[0] = in * components; - } - else { - in_lookup_x[count] = (in-last)*components; - } - last = in; - count++; - } - else if(count) - break; - } - out_x1i = first; - out_x2i = first + count; - first = count = 0; - - for(i = out_y1; i < out_y2; i++) { - int in = (i - out_y1+.5) * in_subh / out_subh + in_y1; - if(in < in_y1) in = in_y1; - if(in > in_y2) in = in_y2; - if(in >= 0 && in < in_h && i >= 0 && i < out_h) { - if(count == 0) first = i; - in_lookup_y[i] = in; - count++; - } - else if(count) - break; - } - out_y1 = first; - out_y2 = first + count; - - int rows = count; - int pkgs = get_total_packages(); - int row1 = out_y1, row2 = row1; - for(int i = 0; i < pkgs; row1=row2 ) { - NNPackage *package = (NNPackage*)get_package(i); - row2 = ++i * rows / pkgs + out_y1; - package->out_row1 = row1; - package->out_row2 = row2; - } -} - -LoadClient* NNEngine::new_client() -{ - return new NNUnit(this); -} - -LoadPackage* NNEngine::new_package() -{ - return new NNPackage; -} - -/* Fully resampled scale / translate / blend ******************************/ -/* resample into a temporary row vector, then blend */ - -#define XSAMPLE(FN, temp_type, type, max, components, chroma_offset, round) { \ - float temp[oh*components]; \ - temp_type opcty = alpha * max + round, trnsp = max - opcty; \ - type **output_rows = (type**)voutput->get_rows() + o1i; \ - type **input_rows = (type**)vinput->get_rows(); \ - \ - for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \ - type *input = input_rows[i - engine->col_out1 + engine->row_in]; \ - float *tempp = temp; \ - if( !k ) { /* direct copy case */ \ - type *ip = input + i1i * components; \ - for(int j = 0; j < oh; j++) { \ - *tempp++ = *ip++; \ - *tempp++ = *ip++ - chroma_offset; \ - *tempp++ = *ip++ - chroma_offset; \ - if( components == 4 ) *tempp++ = *ip++; \ - } \ - } \ - else { /* resample */ \ - for(int j = 0; j < oh; j++) { \ - float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \ - int ki = lookup_sk[j], x = lookup_sx0[j]; \ - type *ip = input + x * components; \ - float wacc = 0, awacc = 0; \ - while(x++ < lookup_sx1[j]) { \ - float kv = k[abs(ki >> INDEX_FRACTION)]; \ - /* handle fractional pixels on edges of input */ \ - if(x == i1i) kv *= i1f; \ - if(x + 1 == i2i) kv *= i2f; \ - if( components == 4 ) { awacc += kv; kv *= ip[3]; } \ - wacc += kv; \ - racc += kv * *ip++; \ - gacc += kv * (*ip++ - chroma_offset); \ - bacc += kv * (*ip++ - chroma_offset); \ - if( components == 4 ) { aacc += kv; ++ip; } \ - ki += kd; \ - } \ - if(wacc > 0.) wacc = 1. / wacc; \ - *tempp++ = racc * wacc; \ - *tempp++ = gacc * wacc; \ - *tempp++ = bacc * wacc; \ - if( components == 4 ) { \ - if(awacc > 0.) awacc = 1. / awacc; \ - *tempp++ = aacc * awacc; \ - } \ - } \ - } \ - \ - /* handle fractional pixels on edges of output */ \ - temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \ - if( components == 4 ) temp[3] *= o1f; \ - tempp = temp + (oh-1)*components; \ - tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \ - if( components == 4 ) tempp[3] *= o2f; \ - tempp = temp; \ - /* blend output */ \ - for(int j = 0; j < oh; j++) { \ - type *output = output_rows[j] + i * components; \ - if( components == 4 ) { \ - temp_type r, g, b, a; \ - ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, round); \ - ALPHA4_STORE(output, chroma_offset, max); \ - } \ - else { \ - temp_type r, g, b; \ - ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, round); \ - ALPHA3_STORE(output, chroma_offset, max); \ - } \ - tempp += components; \ - } \ - } \ - break; \ -} - -#define XBLEND_SAMPLE(FN) { \ - switch(vinput->get_color_model()) { \ - case BC_RGB_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 3, 0.f, 0.f); \ - case BC_RGBA_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 4, 0.f, 0.f); \ - case BC_RGB888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \ - case BC_YUV888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \ - case BC_RGBA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \ - case BC_YUVA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \ - case BC_RGB161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \ - case BC_YUV161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \ - case BC_RGBA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \ - case BC_YUVA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \ - } \ - break; \ -} - - -SamplePackage::SamplePackage() -{ -} - -SampleUnit::SampleUnit(SampleEngine *server) - : LoadClient(server) -{ - this->engine = server; -} - -SampleUnit::~SampleUnit() -{ -} - -void SampleUnit::process_package(LoadPackage *package) -{ - SamplePackage *pkg = (SamplePackage*)package; - - float i1 = engine->in1; - float i2 = engine->in2; - float o1 = engine->out1; - float o2 = engine->out2; - - if(i2 - i1 <= 0 || o2 - o1 <= 0) - return; - - VFrame *voutput = engine->output; - VFrame *vinput = engine->input; - int mode = engine->mode; - float fade = engine->alpha; - float alpha = - BC_CModels::has_alpha(vinput->get_color_model()) && - mode == TRANSFER_REPLACE ? 1.f : engine->alpha; - - //int iw = vinput->get_w(); - int i1i = floor(i1); - int i2i = ceil(i2); - float i1f = 1.f - i1 + i1i; - float i2f = 1.f - i2i + i2; - - int o1i = floor(o1); - int o2i = ceil(o2); - float o1f = 1.f - o1 + o1i; - float o2f = 1.f - o2i + o2; - int oh = o2i - o1i; - - float *k = engine->kernel->lookup; - //float kw = engine->kernel->width; - //int kn = engine->kernel->n; - int kd = engine->kd; - - int *lookup_sx0 = engine->lookup_sx0; - int *lookup_sx1 = engine->lookup_sx1; - int *lookup_sk = engine->lookup_sk; - //float *lookup_wacc = engine->lookup_wacc; - - switch( mode ) { - case TRANSFER_NORMAL: XBLEND_SAMPLE(NORMAL); - case TRANSFER_ADDITION: XBLEND_SAMPLE(ADDITION); - case TRANSFER_SUBTRACT: XBLEND_SAMPLE(SUBTRACT); - case TRANSFER_MULTIPLY: XBLEND_SAMPLE(MULTIPLY); - case TRANSFER_DIVIDE: XBLEND_SAMPLE(DIVIDE); - case TRANSFER_REPLACE: XBLEND_SAMPLE(REPLACE); - case TRANSFER_MAX: XBLEND_SAMPLE(MAX); - case TRANSFER_MIN: XBLEND_SAMPLE(MIN); - case TRANSFER_AVERAGE: XBLEND_SAMPLE(AVERAGE); - case TRANSFER_DARKEN: XBLEND_SAMPLE(DARKEN); - case TRANSFER_LIGHTEN: XBLEND_SAMPLE(LIGHTEN); - case TRANSFER_DST: XBLEND_SAMPLE(DST); - case TRANSFER_DST_ATOP: XBLEND_SAMPLE(DST_ATOP); - case TRANSFER_DST_IN: XBLEND_SAMPLE(DST_IN); - case TRANSFER_DST_OUT: XBLEND_SAMPLE(DST_OUT); - case TRANSFER_DST_OVER: XBLEND_SAMPLE(DST_OVER); - case TRANSFER_SRC: XBLEND_SAMPLE(SRC); - case TRANSFER_SRC_ATOP: XBLEND_SAMPLE(SRC_ATOP); - case TRANSFER_SRC_IN: XBLEND_SAMPLE(SRC_IN); - case TRANSFER_SRC_OUT: XBLEND_SAMPLE(SRC_OUT); - case TRANSFER_SRC_OVER: XBLEND_SAMPLE(SRC_OVER); - case TRANSFER_OR: XBLEND_SAMPLE(OR); - case TRANSFER_XOR: XBLEND_SAMPLE(XOR); - } -} - - -SampleEngine::SampleEngine(int cpus) - : LoadServer(cpus, cpus) -{ - lookup_sx0 = 0; - lookup_sx1 = 0; - lookup_sk = 0; - lookup_wacc = 0; - kd = 0; -} - -SampleEngine::~SampleEngine() -{ - if(lookup_sx0) delete [] lookup_sx0; - if(lookup_sx1) delete [] lookup_sx1; - if(lookup_sk) delete [] lookup_sk; - if(lookup_wacc) delete [] lookup_wacc; -} - -/* - * unlike the Direct and NN engines, the Sample engine works across - * output columns (it makes for more economical memory addressing - * during convolution) - */ -void SampleEngine::init_packages() -{ - int iw = input->get_w(); - int i1i = floor(in1); - int i2i = ceil(in2); - float i1f = 1.f - in1 + i1i; - float i2f = 1.f - i2i + in2; - - int oy = floor(out1); - float oyf = out1 - oy; - int oh = ceil(out2) - oy; - - float *k = kernel->lookup; - float kw = kernel->width; - int kn = kernel->n; - - if(in2 - in1 <= 0 || out2 - out1 <= 0) - return; - - /* determine kernel spatial coverage */ - float scale = (out2 - out1) / (in2 - in1); - float iscale = (in2 - in1) / (out2 - out1); - float coverage = fabs(1.f / scale); - float bound = (coverage < 1.f ? kw : kw * coverage) - (.5f / TRANSFORM_SPP); - float coeff = (coverage < 1.f ? 1.f : scale) * TRANSFORM_SPP; - - delete [] lookup_sx0; - delete [] lookup_sx1; - delete [] lookup_sk; - delete [] lookup_wacc; - - lookup_sx0 = new int[oh]; - lookup_sx1 = new int[oh]; - lookup_sk = new int[oh]; - lookup_wacc = new float[oh]; - - kd = (double)coeff * (1 << INDEX_FRACTION) + .5; - - /* precompute kernel values and weight sums */ - for(int i = 0; i < oh; i++) { - /* map destination back to source */ - double sx = (i - oyf + .5) * iscale + in1 - .5; - - /* - * clip iteration to source area but not source plane. Points - * outside the source plane count as transparent. Points outside - * the source area don't count at all. The actual convolution - * later will be clipped to both, but we need to compute - * weights. - */ - int sx0 = MAX((int)floor(sx - bound) + 1, i1i); - int sx1 = MIN((int)ceil(sx + bound), i2i); - int ki = (double)(sx0 - sx) * coeff * (1 << INDEX_FRACTION) - + (1 << (INDEX_FRACTION - 1)) + .5; - float wacc=0.; - - lookup_sx0[i] = -1; - lookup_sx1[i] = -1; - - for(int j= sx0; j < sx1; j++) { - int kv = (ki >> INDEX_FRACTION); - if(kv > kn) break; - if(kv >= -kn) { - /* - * the contribution of the first and last input pixel (if - * fractional) are linearly weighted by the fraction - */ - if(j == i1i) - wacc += k[abs(kv)] * i1f; - else if(j + 1 == i2i) - wacc += k[abs(kv)] * i2f; - else - wacc += k[abs(kv)]; - - /* this is where we clip the kernel convolution to the source plane */ - if(j >= 0 && j < iw) { - if(lookup_sx0[i] == -1) { - lookup_sx0[i] = j; - lookup_sk[i] = ki; - } - lookup_sx1[i] = j + 1; - } - } - ki += kd; - } - lookup_wacc[i] = wacc > 0. ? 1. / wacc : 0.; - } - - int cols = col_out2 - col_out1; - int pkgs = get_total_packages(); - int col1 = col_out1, col2 = col1; - for(int i = 0; i < pkgs; col1=col2 ) { - SamplePackage *package = (SamplePackage*)get_package(i); - col2 = ++i * cols / pkgs + col_out1; - package->out_col1 = col1; - package->out_col2 = col2; - } -} - -LoadClient* SampleEngine::new_client() -{ - return new SampleUnit(this); -} - -LoadPackage* SampleEngine::new_package() -{ - return new SamplePackage; -}