4 * Copyright (C) 2008 Adam Williams <broadcast at earthling dot net>
5 * Copyright (C) 2012 Monty <monty@xiph.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 #include "overlayframe.h"
37 static inline int mabs(int32_t v) { return abs(v); }
38 static inline int mabs(int64_t v) { return llabs(v); }
39 static inline float mabs(float v) { return fabsf(v); }
41 static inline int32_t aclip(int32_t v, int mx) {
42 return v < 0 ? 0 : v > mx ? mx : v;
44 static inline int64_t aclip(int64_t v, int mx) {
45 return v < 0 ? 0 : v > mx ? mx : v;
47 static inline float aclip(float v, float mx) {
48 return v < 0 ? 0 : v > mx ? mx : v;
50 static inline float aclip(float v, int mx) {
51 return v < 0 ? 0 : v > mx ? mx : v;
53 static inline int aclip(int v, float mx) {
54 return v < 0 ? 0 : v > mx ? mx : v;
56 static inline int32_t cclip(int32_t v, int mx) {
57 return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v;
59 static inline int64_t cclip(int64_t v, int mx) {
60 return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v;
62 static inline float cclip(float v, float mx) {
63 return v > (mx/=2) ? mx : v < (mx=(-mx)) ? mx : v;
65 static inline float cclip(float v, int mx) {
66 return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v;
68 static inline int cclip(int v, float mx) {
69 return v > (mx/=2) ? mx : v < (mx=(-mx-1)) ? mx : v;
73 * New resampler code; replace the original somehwat blurry engine
74 * with a fairly standard kernel resampling core. This could be used
75 * for full affine transformation but only implements scale/translate.
76 * Mostly reuses the old blending macro code.
80 * 1) Pixels are points, not areas or squares.
82 * 2) To maintain the usual edge and scaling conventions, pixels are
83 * set inward from the image edge, eg, the left edge of an image is
84 * at pixel location x=-.5, not x=0. Although pixels are not
85 * squares, the usual way of stating this is 'the pixel is located
86 * at the center of its square'.
88 * 3) Because of 1 and 2, we must truncate and weight the kernel
89 * convolution at the edge of the input area. Otherwise, all
90 * resampled areas would be bordered by a transparency halo. E.g.
91 * in the old engine, upsampling HDV to 1920x1080 results in the
92 * left and right edges being partially transparent and underlying
93 * layers shining through.
95 * 4) The contribution of fractional pixels at the edges of input
96 * ranges are weighted according to the fraction. Note that the
97 * kernel weighting is adjusted, not the opacity. This is one
98 * exception to 'pixels have no area'.
100 * 5) The opacity of fractional pixels at the edges of the output
101 * range is adjusted according to the fraction. This is the other
102 * exception to 'pixels have no area'.
104 * Fractional alpha blending has been modified across the board from:
105 * output_alpha = input_alpha > output_alpha ? input_alpha : output_alpha;
107 * output_alpha = output_alpha + ((max - output_alpha) * input_alpha) / max;
110 #define TRANSFORM_SPP (4096) /* number of data pts per unit x in lookup table */
111 #define INDEX_FRACTION (8) /* bits of fraction past TRANSFORM_SPP on kernel
112 index accumulation */
113 #define TRANSFORM_MIN (.5 / TRANSFORM_SPP)
115 /* Sinc needed for Lanczos kernel */
116 static float sinc(const float x)
120 if(fabsf(x) < TRANSFORM_MIN)
127 * All resampling (except Nearest Neighbor) is performed via
128 * transformed 2D resampling kernels bult from 1D lookups.
130 OverlayKernel::OverlayKernel(int interpolation_type)
133 this->type = interpolation_type;
135 switch(interpolation_type)
139 lookup = new float[(n = TRANSFORM_SPP) + 1];
140 for (i = 0; i <= TRANSFORM_SPP; i++)
141 lookup[i] = (float)(TRANSFORM_SPP - i) / TRANSFORM_SPP;
144 /* Use a Catmull-Rom filter (not b-spline) */
147 lookup = new float[(n = 2 * TRANSFORM_SPP) + 1];
148 for(i = 0; i <= TRANSFORM_SPP; i++) {
149 float x = i / (float)TRANSFORM_SPP;
150 lookup[i] = 1.f - 2.5f * x * x + 1.5f * x * x * x;
152 for(; i <= 2 * TRANSFORM_SPP; i++) {
153 float x = i / (float)TRANSFORM_SPP;
154 lookup[i] = 2.f - 4.f * x + 2.5f * x * x - .5f * x * x * x;
160 lookup = new float[(n = 3 * TRANSFORM_SPP) + 1];
161 for (i = 0; i <= 3 * TRANSFORM_SPP; i++)
162 lookup[i] = sinc((float)i / TRANSFORM_SPP) *
163 sinc((float)i / TRANSFORM_SPP / 3.0f);
174 OverlayKernel::~OverlayKernel()
176 if(lookup) delete [] lookup;
179 OverlayFrame::OverlayFrame(int cpus)
185 memset(kernel, 0, sizeof(kernel));
189 OverlayFrame::~OverlayFrame()
191 if(temp_frame) delete temp_frame;
193 if(direct_engine) delete direct_engine;
194 if(nn_engine) delete nn_engine;
195 if(sample_engine) delete sample_engine;
197 if(kernel[NEAREST_NEIGHBOR]) delete kernel[NEAREST_NEIGHBOR];
198 if(kernel[BILINEAR]) delete kernel[BILINEAR];
199 if(kernel[BICUBIC]) delete kernel[BICUBIC];
200 if(kernel[LANCZOS]) delete kernel[LANCZOS];
203 static float epsilon_snap(float f)
205 return rintf(f * 1024) / 1024.;
208 int OverlayFrame::overlay(VFrame *output, VFrame *input,
209 float in_x1, float in_y1, float in_x2, float in_y2,
210 float out_x1, float out_y1, float out_x2, float out_y2,
211 float alpha, int mode, int interpolation_type)
213 in_x1 = epsilon_snap(in_x1);
214 in_x2 = epsilon_snap(in_x2);
215 in_y1 = epsilon_snap(in_y1);
216 in_y2 = epsilon_snap(in_y2);
217 out_x1 = epsilon_snap(out_x1);
218 out_x2 = epsilon_snap(out_x2);
219 out_y1 = epsilon_snap(out_y1);
220 out_y2 = epsilon_snap(out_y2);
222 if (isnan(in_x1) || isnan(in_x2) ||
223 isnan(in_y1) || isnan(in_y2) ||
224 isnan(out_x1) || isnan(out_x2) ||
225 isnan(out_y1) || isnan(out_y2)) return 1;
227 if( in_x2 <= in_x1 || in_y2 <= in_y1 ) return 1;
228 if( out_x2 <= out_x1 || out_y2 <= out_y1 ) return 1;
230 float xscale = (out_x2 - out_x1) / (in_x2 - in_x1);
231 float yscale = (out_y2 - out_y1) / (in_y2 - in_y1);
232 int in_w = input->get_w(), in_h = input->get_h();
233 int out_w = output->get_w(), out_h = output->get_h();
236 out_x1 -= in_x1 * xscale;
240 out_x2 -= (in_x2 - in_w) * xscale;
244 out_y1 -= in_y1 * yscale;
248 out_y2 -= (in_y2 - in_h) * yscale;
253 in_x1 -= out_x1 / xscale;
256 if( out_x2 > out_w ) {
257 in_x2 -= (out_x2 - out_w) / xscale;
261 in_y1 -= out_y1 / yscale;
264 if( out_y2 > out_h ) {
265 in_y2 -= (out_y2 - out_h) / yscale;
269 if( in_x1 < 0) in_x1 = 0;
270 if( in_y1 < 0) in_y1 = 0;
271 if( in_x2 > in_w ) in_x2 = in_w;
272 if( in_y2 > in_h ) in_y2 = in_h;
273 if( out_x1 < 0) out_x1 = 0;
274 if( out_y1 < 0) out_y1 = 0;
275 if( out_x2 > out_w ) out_x2 = out_w;
276 if( out_y2 > out_h ) out_y2 = out_h;
278 if( in_x2 <= in_x1 || in_y2 <= in_y1 ) return 1;
279 if( out_x2 <= out_x1 || out_y2 <= out_y1 ) return 1;
280 xscale = (out_x2 - out_x1) / (in_x2 - in_x1);
281 yscale = (out_y2 - out_y1) / (in_y2 - in_y1);
283 /* don't interpolate integer translations, or scale no-ops */
284 if(xscale == 1. && yscale == 1. &&
285 (int)in_x1 == in_x1 && (int)in_x2 == in_x2 &&
286 (int)in_y1 == in_y1 && (int)in_y2 == in_y2 &&
287 (int)out_x1 == out_x1 && (int)out_x2 == out_x2 &&
288 (int)out_y1 == out_y1 && (int)out_y2 == out_y2) {
289 if(!direct_engine) direct_engine = new DirectEngine(cpus);
291 direct_engine->output = output; direct_engine->input = input;
292 direct_engine->in_x1 = in_x1; direct_engine->in_y1 = in_y1;
293 direct_engine->out_x1 = out_x1; direct_engine->out_x2 = out_x2;
294 direct_engine->out_y1 = out_y1; direct_engine->out_y2 = out_y2;
295 direct_engine->alpha = alpha; direct_engine->mode = mode;
296 direct_engine->process_packages();
298 else if(interpolation_type == NEAREST_NEIGHBOR) {
299 if(!nn_engine) nn_engine = new NNEngine(cpus);
300 nn_engine->output = output; nn_engine->input = input;
301 nn_engine->in_x1 = in_x1; nn_engine->in_x2 = in_x2;
302 nn_engine->in_y1 = in_y1; nn_engine->in_y2 = in_y2;
303 nn_engine->out_x1 = out_x1; nn_engine->out_x2 = out_x2;
304 nn_engine->out_y1 = out_y1; nn_engine->out_y2 = out_y2;
305 nn_engine->alpha = alpha; nn_engine->mode = mode;
306 nn_engine->process_packages();
309 int xtype = BILINEAR;
310 int ytype = BILINEAR;
312 switch(interpolation_type)
314 case CUBIC_CUBIC: // Bicubic enlargement and reduction
315 xtype = ytype = BICUBIC;
317 case CUBIC_LINEAR: // Bicubic enlargement and bilinear reduction
318 xtype = xscale > 1. ? BICUBIC : BILINEAR;
319 ytype = yscale > 1. ? BICUBIC : BILINEAR;
321 case LINEAR_LINEAR: // Bilinear enlargement and bilinear reduction
322 xtype = ytype = BILINEAR;
324 case LANCZOS_LANCZOS: // Because we can
325 xtype = ytype = LANCZOS;
329 if(xscale == 1. && (int)in_x1 == in_x1 && (int)in_x2 == in_x2 &&
330 (int)out_x1 == out_x1 && (int)out_x2 == out_x2)
333 if(yscale == 1. && (int)in_y1 == in_y1 && (int)in_y2 == in_y2 &&
334 (int)out_y1 == out_y1 && (int)out_y2 == out_y2)
338 kernel[xtype] = new OverlayKernel(xtype);
340 kernel[ytype] = new OverlayKernel(ytype);
343 * horizontal and vertical are separately resampled. First we
344 * resample the input along X into a transposed, temporary frame,
345 * then resample/transpose the temporary space along X into the
346 * output. Fractional pixels along the edge are handled in the X
347 * direction of each step
349 // resampled dimension matches the transposed output space
350 float temp_y1 = out_x1 - floor(out_x1);
351 float temp_y2 = temp_y1 + (out_x2 - out_x1);
352 int temp_h = ceil(temp_y2);
354 // non-resampled dimension merely cropped
355 float temp_x1 = in_y1 - floor(in_y1);
356 float temp_x2 = temp_x1 + (in_y2 - in_y1);
357 int temp_w = ceil(temp_x2);
360 (temp_frame->get_color_model() != input->get_color_model() ||
361 temp_frame->get_w() != temp_w || temp_frame->get_h() != temp_h) ) {
367 temp_frame = new VFrame(0, -1, temp_w, temp_h,
368 input->get_color_model(), -1);
371 temp_frame->clear_frame();
373 if(!sample_engine) sample_engine = new SampleEngine(cpus);
375 sample_engine->output = temp_frame;
376 sample_engine->input = input;
377 sample_engine->kernel = kernel[xtype];
378 sample_engine->col_out1 = 0;
379 sample_engine->col_out2 = temp_w;
380 sample_engine->row_in = floor(in_y1);
382 sample_engine->in1 = in_x1;
383 sample_engine->in2 = in_x2;
384 sample_engine->out1 = temp_y1;
385 sample_engine->out2 = temp_y2;
386 sample_engine->alpha = 1.;
387 sample_engine->mode = TRANSFER_REPLACE;
388 sample_engine->process_packages();
390 sample_engine->output = output;
391 sample_engine->input = temp_frame;
392 sample_engine->kernel = kernel[ytype];
393 sample_engine->col_out1 = floor(out_x1);
394 sample_engine->col_out2 = ceil(out_x2);
395 sample_engine->row_in = 0;
397 sample_engine->in1 = temp_x1;
398 sample_engine->in2 = temp_x2;
399 sample_engine->out1 = out_y1;
400 sample_engine->out2 = out_y2;
401 sample_engine->alpha = alpha;
402 sample_engine->mode = mode;
403 sample_engine->process_packages();
408 // NORMAL [Sa + Da * (1 - Sa), Sc * Sa + Dc * (1 - Sa)])
409 #define ALPHA_NORMAL(mx, Sa, Da) (Sa + (Da * (mx - Sa)) / mx)
410 #define COLOR_NORMAL(mx, Sc, Sa, Dc, Da) ((Sc * Sa + Dc * (mx - Sa)) / mx)
411 #define CHROMA_NORMAL COLOR_NORMAL
413 // ADDITION [(Sa + Da), (Sc + Dc)]
414 #define ALPHA_ADDITION(mx, Sa, Da) (Sa + Da)
415 #define COLOR_ADDITION(mx, Sc, Sa, Dc, Da) (Sc + Dc)
416 #define CHROMA_ADDITION(mx, Sc, Sa, Dc, Da) (Sc + Dc)
418 // SUBTRACT [(Sa - Da), (Sc - Dc)]
419 #define ALPHA_SUBTRACT(mx, Sa, Da) (Sa - Da)
420 #define COLOR_SUBTRACT(mx, Sc, Sa, Dc, Da) (Sc - Dc)
421 #define CHROMA_SUBTRACT(mx, Sc, Sa, Dc, Da) (Sc - Dc)
423 // MULTIPLY [(Sa * Da), Sc * Dc]
424 #define ALPHA_MULTIPLY(mx, Sa, Da) ((Sa * Da) / mx)
425 #define COLOR_MULTIPLY(mx, Sc, Sa, Dc, Da) ((Sc * Dc) / mx)
426 #define CHROMA_MULTIPLY(mx, Sc, Sa, Dc, Da) ((Sc * Dc) / mx)
428 // DIVIDE [(Sa / Da), (Sc / Dc)]
429 #define ALPHA_DIVIDE(mx, Sa, Da) (Da ? ((Sa * mx) / Da) : mx)
430 #define COLOR_DIVIDE(mx, Sc, Sa, Dc, Da) (Dc ? ((Sc * mx) / Dc) : mx)
431 #define CHROMA_DIVIDE(mx, Sc, Sa, Dc, Da) (Dc ? ((Sc * mx) / Dc) : mx)
433 // REPLACE [Sa, Sc] (fade = 1)
434 #define ALPHA_REPLACE(mx, Sa, Da) Sa
435 #define COLOR_REPLACE(mx, Sc, Sa, Dc, Da) Sc
436 #define CHROMA_REPLACE COLOR_REPLACE
438 // MAX [max(Sa, Da), MAX(Sc, Dc)]
439 #define ALPHA_MAX(mx, Sa, Da) (Sa > Da ? Sa : Da)
440 #define COLOR_MAX(mx, Sc, Sa, Dc, Da) (Sc > Dc ? Sc : Dc)
441 #define CHROMA_MAX(mx, Sc, Sa, Dc, Da) (mabs(Sc) > mabs(Dc) ? Sc : Dc)
443 // MIN [min(Sa, Da), MIN(Sc, Dc)]
444 #define ALPHA_MIN(mx, Sa, Da) (Sa < Da ? Sa : Da)
445 #define COLOR_MIN(mx, Sc, Sa, Dc, Da) (Sc < Dc ? Sc : Dc)
446 #define CHROMA_MIN(mx, Sc, Sa, Dc, Da) (mabs(Sc) < mabs(Dc) ? Sc : Dc)
448 // AVERAGE [(Sa + Da) * 0.5, (Sc + Dc) * 0.5]
449 #define ALPHA_AVERAGE(mx, Sa, Da) ((Sa + Da) / 2)
450 #define COLOR_AVERAGE(mx, Sc, Sa, Dc, Da) ((Sc + Dc) / 2)
451 #define CHROMA_AVERAGE COLOR_AVERAGE
453 // DARKEN [Sa + Da - Sa*Da, Sc*(1 - Da) + Dc*(1 - Sa) + min(Sc, Dc)]
454 #define ALPHA_DARKEN(mx, Sa, Da) (Sa + Da - (Sa * Da) / mx)
455 #define COLOR_DARKEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (Sc < Dc ? Sc : Dc))
456 #define CHROMA_DARKEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (mabs(Sc) < mabs(Dc) ? Sc : Dc))
458 // LIGHTEN [Sa + Da - Sa*Da, Sc*(1 - Da) + Dc*(1 - Sa) + max(Sc, Dc)]
459 #define ALPHA_LIGHTEN(mx, Sa, Da) (Sa + Da - Sa * Da / mx)
460 #define COLOR_LIGHTEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (Sc > Dc ? Sc : Dc))
461 #define CHROMA_LIGHTEN(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx + (mabs(Sc) > mabs(Dc) ? Sc : Dc))
464 #define ALPHA_DST(mx, Sa, Da) Da
465 #define COLOR_DST(mx, Sc, Sa, Dc, Da) Dc
466 #define CHROMA_DST COLOR_DST
468 // DST_ATOP [Sa, Sc * (1 - Da) + Dc * Sa]
469 #define ALPHA_DST_ATOP(mx, Sa, Da) Sa
470 #define COLOR_DST_ATOP(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * Sa) / mx)
471 #define CHROMA_DST_ATOP COLOR_DST_ATOP
473 // DST_IN [Da * Sa, Dc * Sa]
474 #define ALPHA_DST_IN(mx, Sa, Da) ((Da * Sa) / mx)
475 #define COLOR_DST_IN(mx, Sc, Sa, Dc, Da) ((Dc * Sa) / mx)
476 #define CHROMA_DST_IN COLOR_DST_IN
478 // DST_OUT [Da * (1 - Sa), Dc * (1 - Sa)]
479 #define ALPHA_DST_OUT(mx, Sa, Da) (Da * (mx - Sa) / mx)
480 #define COLOR_DST_OUT(mx, Sc, Sa, Dc, Da) (Dc * (mx - Sa) / mx)
481 #define CHROMA_DST_OUT COLOR_DST_OUT
483 // DST_OVER [Sa * (1 - Da) + Da, Sc * (1 - Da) + Dc]
484 #define ALPHA_DST_OVER(mx, Sa, Da) ((Sa * (mx - Da)) / mx + Da)
485 #define COLOR_DST_OVER(mx, Sc, Sa, Dc, Da) (Sc * (mx - Da)/ mx + Dc)
486 #define CHROMA_DST_OVER COLOR_DST_OVER
489 #define ALPHA_SRC(mx, Sa, Da) Sa
490 #define COLOR_SRC(mx, Sc, Sa, Dc, Da) Sc
491 #define CHROMA_SRC COLOR_SRC
493 // SRC_ATOP [Da, Sc * Da + Dc * (1 - Sa)]
494 #define ALPHA_SRC_ATOP(mx, Sa, Da) Da
495 #define COLOR_SRC_ATOP(mx, Sc, Sa, Dc, Da) ((Sc * Da + Dc * (mx - Sa)) / mx)
496 #define CHROMA_SRC_ATOP COLOR_SRC_ATOP
498 // SRC_IN [Sa * Da, Sc * Da]
499 #define ALPHA_SRC_IN(mx, Sa, Da) ((Sa * Da) / mx)
500 #define COLOR_SRC_IN(mx, Sc, Sa, Dc, Da) (Sc * Da / mx)
501 #define CHROMA_SRC_IN COLOR_SRC_IN
503 // SRC_OUT [Sa * (1 - Da), Sc * (1 - Da)]
504 #define ALPHA_SRC_OUT(mx, Sa, Da) (Sa * (mx - Da) / mx)
505 #define COLOR_SRC_OUT(mx, Sc, Sa, Dc, Da) (Sc * (mx - Da) / mx)
506 #define CHROMA_SRC_OUT COLOR_SRC_OUT
508 // SRC_OVER [Sa + Da * (1 - Sa), Sc + (1 - Sa) * Dc]
509 #define ALPHA_SRC_OVER(mx, Sa, Da) (Sa + Da * (mx - Sa) / mx)
510 #define COLOR_SRC_OVER(mx, Sc, Sa, Dc, Da) (Sc + Dc * (mx - Sa) / mx)
511 #define CHROMA_SRC_OVER COLOR_SRC_OVER
513 // OR [Sa + Da - Sa * Da, Sc + Dc - Sc * Dc]
514 #define ALPHA_OR(mx, Sa, Da) (Sa + Da - (Sa * Da) / mx)
515 #define COLOR_OR(mx, Sc, Sa, Dc, Da) (Sc + Dc - (Sc * Dc) / mx)
516 #define CHROMA_OR COLOR_OR
518 // XOR [Sa * (1 - Da) + Da * (1 - Sa), Sc * (1 - Da) + Dc * (1 - Sa)]
519 #define ALPHA_XOR(mx, Sa, Da) ((Sa * (mx - Da) + Da * (mx - Sa)) / mx)
520 #define COLOR_XOR(mx, Sc, Sa, Dc, Da) ((Sc * (mx - Da) + Dc * (mx - Sa)) / mx)
521 #define CHROMA_XOR COLOR_XOR
523 #define ZTYP(ty) typedef ty z_##ty __attribute__ ((__unused__))
524 ZTYP(int8_t); ZTYP(uint8_t);
525 ZTYP(int16_t); ZTYP(uint16_t);
526 ZTYP(int32_t); ZTYP(uint32_t);
527 ZTYP(int64_t); ZTYP(uint64_t);
528 ZTYP(float); ZTYP(double);
530 #define ALPHA3_BLEND(FN, typ, inp, out, mx, ofs, rnd) \
531 typ inp0 = (typ)inp[0], inp1 = (typ)inp[1] - ofs; \
532 typ inp2 = (typ)inp[2] - ofs, inp3 = mx; \
533 typ out0 = (typ)out[0], out1 = (typ)out[1] - ofs; \
534 typ out2 = (typ)out[2] - ofs, out3 = mx; \
535 r = COLOR_##FN(mx, inp0, inp3, out0, out3); \
537 g = CHROMA_##FN(mx, inp1, inp3, out1, out3); \
538 b = CHROMA_##FN(mx, inp2, inp3, out2, out3); \
541 g = COLOR_##FN(mx, inp1, inp3, out1, out3); \
542 b = COLOR_##FN(mx, inp2, inp3, out2, out3); \
545 #define ALPHA4_BLEND(FN, typ, inp, out, mx, ofs, rnd) \
546 typ inp0 = (typ)inp[0], inp1 = (typ)inp[1] - ofs; \
547 typ inp2 = (typ)inp[2] - ofs, inp3 = inp[3]; \
548 typ out0 = (typ)out[0], out1 = (typ)out[1] - ofs; \
549 typ out2 = (typ)out[2] - ofs, out3 = out[3]; \
550 r = COLOR_##FN(mx, inp0, inp3, out0, out3); \
552 g = CHROMA_##FN(mx, inp1, inp3, out1, out3); \
553 b = CHROMA_##FN(mx, inp2, inp3, out2, out3); \
556 g = COLOR_##FN(mx, inp1, inp3, out1, out3); \
557 b = COLOR_##FN(mx, inp2, inp3, out2, out3); \
559 a = ALPHA_##FN(mx, inp3, out3)
561 #define ALPHA_STORE(out, ofs, mx) \
566 #define ALPHA3_STORE(out, ofs, mx) \
568 g = ofs ? cclip(g, mx) : aclip(g, mx); \
569 b = ofs ? cclip(b, mx) : aclip(b, mx); \
571 r = (r * opcty + out0 * trnsp) / mx; \
572 g = (g * opcty + out1 * trnsp) / mx; \
573 b = (b * opcty + out2 * trnsp) / mx; \
575 ALPHA_STORE(out, ofs, mx)
577 #define ALPHA4_STORE(out, ofs, mx) \
579 g = ofs ? cclip(g, mx) : aclip(g, mx); \
580 b = ofs ? cclip(b, mx) : aclip(b, mx); \
582 r = (r * opcty + out0 * trnsp) / mx; \
583 g = (g * opcty + out1 * trnsp) / mx; \
584 b = (b * opcty + out2 * trnsp) / mx; \
585 a = (a * opcty + out3 * trnsp) / mx; \
587 ALPHA_STORE(out, ofs, mx); \
588 out[3] = aclip(a, mx)
590 #define XBLEND(FN, temp_type, type, max, components, chroma_offset, round) { \
591 temp_type opcty = fade * max + round, trnsp = max - opcty; \
592 type** output_rows = (type**)output->get_rows(); \
593 type** input_rows = (type**)input->get_rows(); \
594 ix *= components; ox *= components; \
596 for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \
597 type* in_row = input_rows[i + iy] + ix; \
598 type* output = output_rows[i] + ox; \
599 for(int j = 0; j < ow; j++) { \
600 if( components == 4 ) { \
601 temp_type r, g, b, a; \
602 ALPHA4_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \
603 ALPHA4_STORE(output, chroma_offset, max); \
607 ALPHA3_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \
608 ALPHA3_STORE(output, chroma_offset, max); \
610 in_row += components; output += components; \
616 #define XBLEND_ONLY(FN) { \
617 switch(input->get_color_model()) { \
618 case BC_RGB_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 3, 0, 0.f); \
619 case BC_RGBA_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 4, 0, 0.f); \
620 case BC_RGB888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
621 case BC_YUV888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
622 case BC_RGBA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
623 case BC_YUVA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
624 case BC_RGB161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
625 case BC_YUV161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
626 case BC_RGBA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
627 case BC_YUVA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
632 /* Direct translate / blend **********************************************/
634 DirectPackage::DirectPackage()
638 DirectUnit::DirectUnit(DirectEngine *server)
641 this->engine = server;
644 DirectUnit::~DirectUnit()
648 void DirectUnit::process_package(LoadPackage *package)
650 DirectPackage *pkg = (DirectPackage*)package;
652 VFrame *output = engine->output;
653 VFrame *input = engine->input;
654 int mode = engine->mode;
656 BC_CModels::has_alpha(input->get_color_model()) &&
657 mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
659 int ix = engine->in_x1;
660 int ox = engine->out_x1;
661 int ow = engine->out_x2 - ox;
662 int iy = engine->in_y1 - engine->out_y1;
665 case TRANSFER_NORMAL: XBLEND_ONLY(NORMAL);
666 case TRANSFER_ADDITION: XBLEND_ONLY(ADDITION);
667 case TRANSFER_SUBTRACT: XBLEND_ONLY(SUBTRACT);
668 case TRANSFER_MULTIPLY: XBLEND_ONLY(MULTIPLY);
669 case TRANSFER_DIVIDE: XBLEND_ONLY(DIVIDE);
670 case TRANSFER_REPLACE: XBLEND_ONLY(REPLACE);
671 case TRANSFER_MAX: XBLEND_ONLY(MAX);
672 case TRANSFER_MIN: XBLEND_ONLY(MIN);
673 case TRANSFER_AVERAGE: XBLEND_ONLY(AVERAGE);
674 case TRANSFER_DARKEN: XBLEND_ONLY(DARKEN);
675 case TRANSFER_LIGHTEN: XBLEND_ONLY(LIGHTEN);
676 case TRANSFER_DST: XBLEND_ONLY(DST);
677 case TRANSFER_DST_ATOP: XBLEND_ONLY(DST_ATOP);
678 case TRANSFER_DST_IN: XBLEND_ONLY(DST_IN);
679 case TRANSFER_DST_OUT: XBLEND_ONLY(DST_OUT);
680 case TRANSFER_DST_OVER: XBLEND_ONLY(DST_OVER);
681 case TRANSFER_SRC: XBLEND_ONLY(SRC);
682 case TRANSFER_SRC_ATOP: XBLEND_ONLY(SRC_ATOP);
683 case TRANSFER_SRC_IN: XBLEND_ONLY(SRC_IN);
684 case TRANSFER_SRC_OUT: XBLEND_ONLY(SRC_OUT);
685 case TRANSFER_SRC_OVER: XBLEND_ONLY(SRC_OVER);
686 case TRANSFER_OR: XBLEND_ONLY(OR);
687 case TRANSFER_XOR: XBLEND_ONLY(XOR);
691 DirectEngine::DirectEngine(int cpus)
692 : LoadServer(cpus, cpus)
696 DirectEngine::~DirectEngine()
700 void DirectEngine::init_packages()
702 if(in_x1 < 0) { out_x1 -= in_x1; in_x1 = 0; }
703 if(in_y1 < 0) { out_y1 -= in_y1; in_y1 = 0; }
704 if(out_x1 < 0) { in_x1 -= out_x1; out_x1 = 0; }
705 if(out_y1 < 0) { in_y1 -= out_y1; out_y1 = 0; }
706 if(out_x2 > output->get_w()) out_x2 = output->get_w();
707 if(out_y2 > output->get_h()) out_y2 = output->get_h();
708 int out_w = out_x2 - out_x1;
709 int out_h = out_y2 - out_y1;
710 if( !out_w || !out_h ) return;
713 int pkgs = get_total_packages();
714 int row1 = out_y1, row2 = row1;
715 for(int i = 0; i < pkgs; row1=row2 ) {
716 DirectPackage *package = (DirectPackage*)get_package(i);
717 row2 = ++i * rows / pkgs + out_y1;
718 package->out_row1 = row1;
719 package->out_row2 = row2;
723 LoadClient* DirectEngine::new_client()
725 return new DirectUnit(this);
728 LoadPackage* DirectEngine::new_package()
730 return new DirectPackage;
733 /* Nearest Neighbor scale / translate / blend ********************/
735 #define XBLEND_3NN(FN, temp_type, type, max, components, chroma_offset, round) { \
736 temp_type opcty = fade * max + round, trnsp = max - opcty; \
737 type** output_rows = (type**)output->get_rows(); \
738 type** input_rows = (type**)input->get_rows(); \
741 for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \
742 int *lx = engine->in_lookup_x; \
743 type* in_row = input_rows[*ly++]; \
744 type* output = output_rows[i] + ox; \
745 for(int j = 0; j < ow; j++) { \
747 if( components == 4 ) { \
748 temp_type r, g, b, a; \
749 ALPHA4_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \
750 ALPHA4_STORE(output, chroma_offset, max); \
754 ALPHA3_BLEND(FN, temp_type, in_row, output, max, chroma_offset, round); \
755 ALPHA3_STORE(output, chroma_offset, max); \
757 output += components; \
763 #define XBLEND_NN(FN) { \
764 switch(input->get_color_model()) { \
765 case BC_RGB_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 3, 0, 0.f); \
766 case BC_RGBA_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 4, 0, 0.f); \
767 case BC_RGB888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
768 case BC_YUV888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
769 case BC_RGBA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
770 case BC_YUVA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
771 case BC_RGB161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
772 case BC_YUV161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
773 case BC_RGBA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
774 case BC_YUVA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
779 NNPackage::NNPackage()
783 NNUnit::NNUnit(NNEngine *server)
786 this->engine = server;
793 void NNUnit::process_package(LoadPackage *package)
795 NNPackage *pkg = (NNPackage*)package;
796 VFrame *output = engine->output;
797 VFrame *input = engine->input;
798 int mode = engine->mode;
800 BC_CModels::has_alpha(input->get_color_model()) &&
801 mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
803 int ox = engine->out_x1i;
804 int ow = engine->out_x2i - ox;
805 int *ly = engine->in_lookup_y + pkg->out_row1;
808 case TRANSFER_NORMAL: XBLEND_NN(NORMAL);
809 case TRANSFER_ADDITION: XBLEND_NN(ADDITION);
810 case TRANSFER_SUBTRACT: XBLEND_NN(SUBTRACT);
811 case TRANSFER_MULTIPLY: XBLEND_NN(MULTIPLY);
812 case TRANSFER_DIVIDE: XBLEND_NN(DIVIDE);
813 case TRANSFER_REPLACE: XBLEND_NN(REPLACE);
814 case TRANSFER_MAX: XBLEND_NN(MAX);
815 case TRANSFER_MIN: XBLEND_NN(MIN);
816 case TRANSFER_AVERAGE: XBLEND_NN(AVERAGE);
817 case TRANSFER_DARKEN: XBLEND_NN(DARKEN);
818 case TRANSFER_LIGHTEN: XBLEND_NN(LIGHTEN);
819 case TRANSFER_DST: XBLEND_NN(DST);
820 case TRANSFER_DST_ATOP: XBLEND_NN(DST_ATOP);
821 case TRANSFER_DST_IN: XBLEND_NN(DST_IN);
822 case TRANSFER_DST_OUT: XBLEND_NN(DST_OUT);
823 case TRANSFER_DST_OVER: XBLEND_NN(DST_OVER);
824 case TRANSFER_SRC: XBLEND_NN(SRC);
825 case TRANSFER_SRC_ATOP: XBLEND_NN(SRC_ATOP);
826 case TRANSFER_SRC_IN: XBLEND_NN(SRC_IN);
827 case TRANSFER_SRC_OUT: XBLEND_NN(SRC_OUT);
828 case TRANSFER_SRC_OVER: XBLEND_NN(SRC_OVER);
829 case TRANSFER_OR: XBLEND_NN(OR);
830 case TRANSFER_XOR: XBLEND_NN(XOR);
834 NNEngine::NNEngine(int cpus)
835 : LoadServer(cpus, cpus)
841 NNEngine::~NNEngine()
844 delete[] in_lookup_x;
846 delete[] in_lookup_y;
849 void NNEngine::init_packages()
851 int in_w = input->get_w();
852 int in_h = input->get_h();
853 int out_w = output->get_w();
854 int out_h = output->get_h();
856 float in_subw = in_x2 - in_x1;
857 float in_subh = in_y2 - in_y1;
858 float out_subw = out_x2 - out_x1;
859 float out_subh = out_y2 - out_y1;
860 int first, last, count, i;
863 out_x1i = rint(out_x1);
864 out_x2i = rint(out_x2);
865 if(out_x1i < 0) out_x1i = 0;
866 if(out_x1i > out_w) out_x1i = out_w;
867 if(out_x2i < 0) out_x2i = 0;
868 if(out_x2i > out_w) out_x2i = out_w;
869 int out_wi = out_x2i - out_x1i;
870 if( !out_wi ) return;
872 delete[] in_lookup_x;
873 in_lookup_x = new int[out_wi];
874 delete[] in_lookup_y;
875 in_lookup_y = new int[out_h];
877 switch(input->get_color_model()) {
881 case BC_RGBA16161616:
888 for(i = out_x1i; i < out_x2i; i++) {
889 int in = (i - out_x1 + .5) * in_subw / out_subw + in_x1;
895 if(in >= 0 && in < in_w && in >= in_x1 && i >= 0 && i < out_w) {
898 in_lookup_x[0] = in * components;
901 in_lookup_x[count] = (in-last)*components;
910 out_x2i = first + count;
913 for(i = out_y1; i < out_y2; i++) {
914 int in = (i - out_y1+.5) * in_subh / out_subh + in_y1;
915 if(in < in_y1) in = in_y1;
916 if(in > in_y2) in = in_y2;
917 if(in >= 0 && in < in_h && i >= 0 && i < out_h) {
918 if(count == 0) first = i;
926 out_y2 = first + count;
929 int pkgs = get_total_packages();
930 int row1 = out_y1, row2 = row1;
931 for(int i = 0; i < pkgs; row1=row2 ) {
932 NNPackage *package = (NNPackage*)get_package(i);
933 row2 = ++i * rows / pkgs + out_y1;
934 package->out_row1 = row1;
935 package->out_row2 = row2;
939 LoadClient* NNEngine::new_client()
941 return new NNUnit(this);
944 LoadPackage* NNEngine::new_package()
946 return new NNPackage;
949 /* Fully resampled scale / translate / blend ******************************/
950 /* resample into a temporary row vector, then blend */
952 #define XSAMPLE(FN, temp_type, type, max, components, chroma_offset, round) { \
953 float temp[oh*components]; \
954 temp_type opcty = fade * max + round, trnsp = max - opcty; \
955 type **output_rows = (type**)voutput->get_rows() + o1i; \
956 type **input_rows = (type**)vinput->get_rows(); \
958 for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
959 type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
960 float *tempp = temp; \
961 if( !k ) { /* direct copy case */ \
962 type *ip = input + i1i * components; \
963 for(int j = 0; j < oh; j++) { \
965 *tempp++ = *ip++ - chroma_offset; \
966 *tempp++ = *ip++ - chroma_offset; \
967 if( components == 4 ) *tempp++ = *ip++; \
970 else { /* resample */ \
971 for(int j = 0; j < oh; j++) { \
972 float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
973 int ki = lookup_sk[j], x = lookup_sx0[j]; \
974 type *ip = input + x * components; \
975 float wacc = 0, awacc = 0; \
976 while(x++ < lookup_sx1[j]) { \
977 float kv = k[abs(ki >> INDEX_FRACTION)]; \
978 /* handle fractional pixels on edges of input */ \
979 if(x == i1i) kv *= i1f; \
980 if(x + 1 == i2i) kv *= i2f; \
981 if( components == 4 ) { awacc += kv; kv *= ip[3]; } \
983 racc += kv * *ip++; \
984 gacc += kv * (*ip++ - chroma_offset); \
985 bacc += kv * (*ip++ - chroma_offset); \
986 if( components == 4 ) { aacc += kv; ++ip; } \
989 if(wacc > 0.) wacc = 1. / wacc; \
990 *tempp++ = racc * wacc; \
991 *tempp++ = gacc * wacc; \
992 *tempp++ = bacc * wacc; \
993 if( components == 4 ) { \
994 if(awacc > 0.) awacc = 1. / awacc; \
995 *tempp++ = aacc * awacc; \
1000 /* handle fractional pixels on edges of output */ \
1001 temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \
1002 if( components == 4 ) temp[3] *= o1f; \
1003 tempp = temp + (oh-1)*components; \
1004 tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \
1005 if( components == 4 ) tempp[3] *= o2f; \
1007 /* blend output */ \
1008 for(int j = 0; j < oh; j++) { \
1009 type *output = output_rows[j] + i * components; \
1010 if( components == 4 ) { \
1011 temp_type r, g, b, a; \
1012 ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, round); \
1013 ALPHA4_STORE(output, chroma_offset, max); \
1016 temp_type r, g, b; \
1017 ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, round); \
1018 ALPHA3_STORE(output, chroma_offset, max); \
1020 tempp += components; \
1026 #define XBLEND_SAMPLE(FN) { \
1027 switch(vinput->get_color_model()) { \
1028 case BC_RGB_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 3, 0.f, 0.f); \
1029 case BC_RGBA_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 4, 0.f, 0.f); \
1030 case BC_RGB888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
1031 case BC_YUV888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
1032 case BC_RGBA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
1033 case BC_YUVA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
1034 case BC_RGB161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
1035 case BC_YUV161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
1036 case BC_RGBA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
1037 case BC_YUVA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
1043 SamplePackage::SamplePackage()
1047 SampleUnit::SampleUnit(SampleEngine *server)
1048 : LoadClient(server)
1050 this->engine = server;
1053 SampleUnit::~SampleUnit()
1057 void SampleUnit::process_package(LoadPackage *package)
1059 SamplePackage *pkg = (SamplePackage*)package;
1061 float i1 = engine->in1;
1062 float i2 = engine->in2;
1063 float o1 = engine->out1;
1064 float o2 = engine->out2;
1066 if(i2 - i1 <= 0 || o2 - o1 <= 0)
1069 VFrame *voutput = engine->output;
1070 VFrame *vinput = engine->input;
1071 int mode = engine->mode;
1073 BC_CModels::has_alpha(vinput->get_color_model()) &&
1074 mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
1076 //int iw = vinput->get_w();
1077 int i1i = floor(i1);
1079 float i1f = 1.f - i1 + i1i;
1080 float i2f = 1.f - i2i + i2;
1082 int o1i = floor(o1);
1084 float o1f = 1.f - o1 + o1i;
1085 float o2f = 1.f - o2i + o2;
1088 float *k = engine->kernel->lookup;
1089 //float kw = engine->kernel->width;
1090 //int kn = engine->kernel->n;
1091 int kd = engine->kd;
1093 int *lookup_sx0 = engine->lookup_sx0;
1094 int *lookup_sx1 = engine->lookup_sx1;
1095 int *lookup_sk = engine->lookup_sk;
1096 //float *lookup_wacc = engine->lookup_wacc;
1099 case TRANSFER_NORMAL: XBLEND_SAMPLE(NORMAL);
1100 case TRANSFER_ADDITION: XBLEND_SAMPLE(ADDITION);
1101 case TRANSFER_SUBTRACT: XBLEND_SAMPLE(SUBTRACT);
1102 case TRANSFER_MULTIPLY: XBLEND_SAMPLE(MULTIPLY);
1103 case TRANSFER_DIVIDE: XBLEND_SAMPLE(DIVIDE);
1104 case TRANSFER_REPLACE: XBLEND_SAMPLE(REPLACE);
1105 case TRANSFER_MAX: XBLEND_SAMPLE(MAX);
1106 case TRANSFER_MIN: XBLEND_SAMPLE(MIN);
1107 case TRANSFER_AVERAGE: XBLEND_SAMPLE(AVERAGE);
1108 case TRANSFER_DARKEN: XBLEND_SAMPLE(DARKEN);
1109 case TRANSFER_LIGHTEN: XBLEND_SAMPLE(LIGHTEN);
1110 case TRANSFER_DST: XBLEND_SAMPLE(DST);
1111 case TRANSFER_DST_ATOP: XBLEND_SAMPLE(DST_ATOP);
1112 case TRANSFER_DST_IN: XBLEND_SAMPLE(DST_IN);
1113 case TRANSFER_DST_OUT: XBLEND_SAMPLE(DST_OUT);
1114 case TRANSFER_DST_OVER: XBLEND_SAMPLE(DST_OVER);
1115 case TRANSFER_SRC: XBLEND_SAMPLE(SRC);
1116 case TRANSFER_SRC_ATOP: XBLEND_SAMPLE(SRC_ATOP);
1117 case TRANSFER_SRC_IN: XBLEND_SAMPLE(SRC_IN);
1118 case TRANSFER_SRC_OUT: XBLEND_SAMPLE(SRC_OUT);
1119 case TRANSFER_SRC_OVER: XBLEND_SAMPLE(SRC_OVER);
1120 case TRANSFER_OR: XBLEND_SAMPLE(OR);
1121 case TRANSFER_XOR: XBLEND_SAMPLE(XOR);
1126 SampleEngine::SampleEngine(int cpus)
1127 : LoadServer(cpus, cpus)
1136 SampleEngine::~SampleEngine()
1138 if(lookup_sx0) delete [] lookup_sx0;
1139 if(lookup_sx1) delete [] lookup_sx1;
1140 if(lookup_sk) delete [] lookup_sk;
1141 if(lookup_wacc) delete [] lookup_wacc;
1145 * unlike the Direct and NN engines, the Sample engine works across
1146 * output columns (it makes for more economical memory addressing
1147 * during convolution)
1149 void SampleEngine::init_packages()
1151 int iw = input->get_w();
1152 int i1i = floor(in1);
1153 int i2i = ceil(in2);
1154 float i1f = 1.f - in1 + i1i;
1155 float i2f = 1.f - i2i + in2;
1157 int oy = floor(out1);
1158 float oyf = out1 - oy;
1159 int oh = ceil(out2) - oy;
1161 float *k = kernel->lookup;
1162 float kw = kernel->width;
1165 if(in2 - in1 <= 0 || out2 - out1 <= 0)
1168 /* determine kernel spatial coverage */
1169 float scale = (out2 - out1) / (in2 - in1);
1170 float iscale = (in2 - in1) / (out2 - out1);
1171 float coverage = fabs(1.f / scale);
1172 float bound = (coverage < 1.f ? kw : kw * coverage) - (.5f / TRANSFORM_SPP);
1173 float coeff = (coverage < 1.f ? 1.f : scale) * TRANSFORM_SPP;
1175 delete [] lookup_sx0;
1176 delete [] lookup_sx1;
1177 delete [] lookup_sk;
1178 delete [] lookup_wacc;
1180 lookup_sx0 = new int[oh];
1181 lookup_sx1 = new int[oh];
1182 lookup_sk = new int[oh];
1183 lookup_wacc = new float[oh];
1185 kd = (double)coeff * (1 << INDEX_FRACTION) + .5;
1187 /* precompute kernel values and weight sums */
1188 for(int i = 0; i < oh; i++) {
1189 /* map destination back to source */
1190 double sx = (i - oyf + .5) * iscale + in1 - .5;
1193 * clip iteration to source area but not source plane. Points
1194 * outside the source plane count as transparent. Points outside
1195 * the source area don't count at all. The actual convolution
1196 * later will be clipped to both, but we need to compute
1199 int sx0 = MAX((int)floor(sx - bound) + 1, i1i);
1200 int sx1 = MIN((int)ceil(sx + bound), i2i);
1201 int ki = (double)(sx0 - sx) * coeff * (1 << INDEX_FRACTION)
1202 + (1 << (INDEX_FRACTION - 1)) + .5;
1208 for(int j= sx0; j < sx1; j++) {
1209 int kv = (ki >> INDEX_FRACTION);
1213 * the contribution of the first and last input pixel (if
1214 * fractional) are linearly weighted by the fraction
1217 wacc += k[abs(kv)] * i1f;
1218 else if(j + 1 == i2i)
1219 wacc += k[abs(kv)] * i2f;
1223 /* this is where we clip the kernel convolution to the source plane */
1224 if(j >= 0 && j < iw) {
1225 if(lookup_sx0[i] == -1) {
1229 lookup_sx1[i] = j + 1;
1234 lookup_wacc[i] = wacc > 0. ? 1. / wacc : 0.;
1237 int cols = col_out2 - col_out1;
1238 int pkgs = get_total_packages();
1239 int col1 = col_out1, col2 = col1;
1240 for(int i = 0; i < pkgs; col1=col2 ) {
1241 SamplePackage *package = (SamplePackage*)get_package(i);
1242 col2 = ++i * cols / pkgs + col_out1;
1243 package->out_col1 = col1;
1244 package->out_col2 = col2;
1248 LoadClient* SampleEngine::new_client()
1250 return new SampleUnit(this);
1253 LoadPackage* SampleEngine::new_package()
1255 return new SamplePackage;