translation fixes, pref wdw min width, xfer mode ops
[goodguy/history.git] / cinelerra-5.1 / cinelerra / overlayframe.C.clamp
1 #include <math.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdint.h>
5
6 #include "clip.h"
7 #include "edl.inc"
8 #include "mutex.h"
9 #include "overlayframe.h"
10 #include "vframe.h"
11
12 OverlayFrame::OverlayFrame(int cpus)
13 {
14         temp_frame = 0;
15         blend_engine = 0;
16         scale_engine = 0;
17         scaletranslate_engine = 0;
18         translate_engine = 0;
19         this->cpus = cpus;
20 }
21
22 OverlayFrame::~OverlayFrame()
23 {
24 //printf("OverlayFrame::~OverlayFrame 1\n");
25         if(temp_frame) delete temp_frame;
26         if(scale_engine) delete scale_engine;
27         if(translate_engine) delete translate_engine;
28         if(blend_engine) delete blend_engine;
29         if(scaletranslate_engine) delete scaletranslate_engine;
30 //printf("OverlayFrame::~OverlayFrame 2\n");
31 }
32
33
34
35
36
37
38
39
40 // Verification: 
41
42 // (255 * 255 + 0 * 0) / 255 = 255
43 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
44
45 // (65535 * 65535 + 0 * 0) / 65535 = 65535
46 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
47
48
49 // Branch prediction 4 U
50
51 #define BLEND_3(max, type) \
52 { \
53         int64_t r, g, b; \
54  \
55 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
56         switch(mode) \
57         { \
58                 case TRANSFER_DIVIDE: \
59                         r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
60                         g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
61                         b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
62                         r = (r * opacity + output[0] * transparency) / max; \
63                         g = (g * opacity + output[1] * transparency) / max; \
64                         b = (b * opacity + output[2] * transparency) / max; \
65                         break; \
66                 case TRANSFER_MULTIPLY: \
67                         r = ((int64_t)input1 * output[0]) / max; \
68                         g = ((int64_t)input2 * output[1]) / max; \
69                         b = ((int64_t)input3 * output[2]) / max; \
70                         r = (r * opacity + output[0] * transparency) / max; \
71                         g = (g * opacity + output[1] * transparency) / max; \
72                         b = (b * opacity + output[2] * transparency) / max; \
73                         break; \
74                 case TRANSFER_SUBTRACT: \
75                         r = (((int64_t)input1 - output[0]) * opacity + output[0] * transparency) / max; \
76                         g = (((int64_t)input2 - output[1]) * opacity + output[1] * transparency) / max; \
77                         b = (((int64_t)input3 - output[2]) * opacity + output[2] * transparency) / max; \
78                         break; \
79                 case TRANSFER_ADDITION: \
80                         r = (((int64_t)input1 + output[0]) * opacity + output[0] * transparency) / max; \
81                         g = (((int64_t)input2 + output[1]) * opacity + output[1] * transparency) / max; \
82                         b = (((int64_t)input3 + output[2]) * opacity + output[2] * transparency) / max; \
83                         break; \
84                 case TRANSFER_REPLACE: \
85                         r = input1; \
86                         g = input2; \
87                         b = input3; \
88                         break; \
89                 case TRANSFER_NORMAL: \
90                         r = ((int64_t)input1 * opacity + output[0] * transparency) / max; \
91                         g = ((int64_t)input2 * opacity + output[1] * transparency) / max; \
92                         b = ((int64_t)input3 * opacity + output[2] * transparency) / max; \
93                         break; \
94         } \
95  \
96         output[0] = (type)CLIP(r, 0, max); \
97         output[1] = (type)CLIP(g, 0, max); \
98         output[2] = (type)CLIP(b, 0, max); \
99 }
100
101
102
103
104
105 // Blending equations are drastically different for 3 and 4 components
106 #define BLEND_4(max, type) \
107 { \
108         int64_t r, g, b, a; \
109         int64_t pixel_opacity, pixel_transparency; \
110  \
111         pixel_opacity = opacity * input4 / max; \
112         pixel_transparency = (max - pixel_opacity) * output[3] / max; \
113  \
114         switch(mode) \
115         { \
116                 case TRANSFER_DIVIDE: \
117                         r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
118                         g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
119                         b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
120                         r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
121                         g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
122                         b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
123                         a = input4 > output[3] ? input4 : output[3]; \
124                         break; \
125                 case TRANSFER_MULTIPLY: \
126                         r = ((int64_t)input1 * output[0]) / max; \
127                         g = ((int64_t)input2 * output[1]) / max; \
128                         b = ((int64_t)input3 * output[2]) / max; \
129                         r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
130                         g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
131                         b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
132                         a = input4 > output[3] ? input4 : output[3]; \
133                         break; \
134                 case TRANSFER_SUBTRACT: \
135                         r = (((int64_t)input1 - output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
136                         g = (((int64_t)input2 - output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
137                         b = (((int64_t)input3 - output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
138                         a = input4 > output[3] ? input4 : output[3]; \
139                         break; \
140                 case TRANSFER_ADDITION: \
141                         r = (((int64_t)input1 + output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
142                         g = (((int64_t)input2 + output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
143                         b = (((int64_t)input3 + output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
144                         a = input4 > output[3] ? input4 : output[3]; \
145                         break; \
146                 case TRANSFER_REPLACE: \
147                         r = input1; \
148                         g = input2; \
149                         b = input3; \
150                         a = input4; \
151                         break; \
152                 case TRANSFER_NORMAL: \
153                         r = ((int64_t)input1 * pixel_opacity + output[0] * pixel_transparency) / max; \
154                         g = ((int64_t)input2 * pixel_opacity + output[1] * pixel_transparency) / max; \
155                         b = ((int64_t)input3 * pixel_opacity + output[2] * pixel_transparency) / max; \
156                         a = input4 > output[3] ? input4 : output[3]; \
157                         break; \
158         } \
159  \
160         output[0] = (type)CLIP(r, 0, max); \
161         output[1] = (type)CLIP(g, 0, max); \
162         output[2] = (type)CLIP(b, 0, max); \
163         output[3] = (type)a; \
164 }
165
166
167
168
169
170
171
172
173 // Bicubic algorithm using multiprocessors
174 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
175
176 // Nearest neighbor algorithm using multiprocessors for blending
177 // input -> scale + translate -> blend -> output
178
179
180 int OverlayFrame::overlay(VFrame *output, 
181         VFrame *input, 
182         float in_x1, 
183         float in_y1, 
184         float in_x2, 
185         float in_y2, 
186         float out_x1, 
187         float out_y1, 
188         float out_x2, 
189         float out_y2, 
190         float alpha,       // 0 - 1
191         int mode,
192         int interpolation_type)
193 {
194         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
195         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
196
197 //printf("OverlayFrame::overlay 1 %d %f\n", mode, alpha);
198 // Limit values
199         if(in_x1 < 0)
200         {
201                 out_x1 += -in_x1 * w_scale;
202                 in_x1 = 0;
203         }
204         else
205         if(in_x1 >= input->get_w())
206         {
207                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
208                 in_x1 = input->get_w();
209         }
210
211         if(in_y1 < 0)
212         {
213                 out_y1 += -in_y1 * h_scale;
214                 in_y1 = 0;
215         }
216         else
217         if(in_y1 >= input->get_h())
218         {
219                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
220                 in_y1 = input->get_h();
221         }
222
223         if(in_x2 < 0)
224         {
225                 out_x2 += -in_x2 * w_scale;
226                 in_x2 = 0;
227         }
228         else
229         if(in_x2 >= input->get_w())
230         {
231                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
232                 in_x2 = input->get_w();
233         }
234
235         if(in_y2 < 0)
236         {
237                 out_y2 += -in_y2 * h_scale;
238                 in_y2 = 0;
239         }
240         else
241         if(in_y2 >= input->get_h())
242         {
243                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
244                 in_y2 = input->get_h();
245         }
246
247         if(out_x1 < 0)
248         {
249                 in_x1 += -out_x1 / w_scale;
250                 out_x1 = 0;
251         }
252         else
253         if(out_x1 >= output->get_w())
254         {
255                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
256                 out_x1 = output->get_w();
257         }
258
259         if(out_y1 < 0)
260         {
261                 in_y1 += -out_y1 / h_scale;
262                 out_y1 = 0;
263         }
264         else
265         if(out_y1 >= output->get_h())
266         {
267                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
268                 out_y1 = output->get_h();
269         }
270
271         if(out_x2 < 0)
272         {
273                 in_x2 += -out_x2 / w_scale;
274                 out_x2 = 0;
275         }
276         else
277         if(out_x2 >= output->get_w())
278         {
279                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
280                 out_x2 = output->get_w();
281         }
282
283         if(out_y2 < 0)
284         {
285                 in_y2 += -out_y2 / h_scale;
286                 out_y2 = 0;
287         }
288         else
289         if(out_y2 >= output->get_h())
290         {
291                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
292                 out_y2 = output->get_h();
293         }
294
295
296
297
298
299         float in_w = in_x2 - in_x1;
300         float in_h = in_y2 - in_y1;
301         float out_w = out_x2 - out_x1;
302         float out_h = out_y2 - out_y1;
303 // Input for translation operation
304         VFrame *translation_input = input;
305
306
307
308 // printf("OverlayFrame::overlay %f %f %f %f -> %f %f %f %f\n", in_x1,
309 //                      in_y1,
310 //                      in_x2,
311 //                      in_y2,
312 //                      out_x1,
313 //                      out_y1,
314 //                      out_x2,
315 //                      out_y2);
316
317
318
319
320
321 // ****************************************************************************
322 // Transfer to temp buffer by scaling nearest integer boundaries
323 // ****************************************************************************
324         if(interpolation_type != NEAREST_NEIGHBOR &&
325                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
326         {
327 // Create integer boundaries for interpolation
328                 int in_x1_int = (int)in_x1;
329                 int in_y1_int = (int)in_y1;
330                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
331                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
332
333 // Dimensions of temp frame.  Integer boundaries scaled.
334                 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
335                 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
336                 VFrame *scale_output;
337
338
339
340 #define NO_TRANSLATION1 \
341         (EQUIV(in_x1, 0) && \
342         EQUIV(in_y1, 0) && \
343         EQUIV(out_x1, 0) && \
344         EQUIV(out_y1, 0) && \
345         EQUIV(in_x2, in_x2_int) && \
346         EQUIV(in_y2, in_y2_int) && \
347         EQUIV(out_x2, temp_w) && \
348         EQUIV(out_y2, temp_h))
349
350
351 #define NO_BLEND \
352         (EQUIV(alpha, 1) && \
353         (mode == TRANSFER_REPLACE || \
354         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
355
356
357
358
359
360 // Prepare destination for operation
361
362 // No translation and no blending.  The blending operation is built into the
363 // translation unit but not the scaling unit.
364 // input -> output
365                 if(NO_TRANSLATION1 &&
366                         NO_BLEND)
367                 {
368 // printf("OverlayFrame::overlay input -> output\n");
369
370                         scale_output = output;
371                         translation_input = 0;
372                 }
373                 else
374 // If translation or blending
375 // input -> nearest integer boundary temp
376                 {
377                         if(temp_frame && 
378                                 (temp_frame->get_w() != temp_w ||
379                                         temp_frame->get_h() != temp_h))
380                         {
381                                 delete temp_frame;
382                                 temp_frame = 0;
383                         }
384
385                         if(!temp_frame)
386                         {
387                                 temp_frame = new VFrame(0,
388                                         temp_w,
389                                         temp_h,
390                                         input->get_color_model(),
391                                         -1);
392                         }
393 //printf("OverlayFrame::overlay input -> temp\n");
394
395
396                         temp_frame->clear_frame();
397
398 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
399 //      temp_w, temp_h);
400                         scale_output = temp_frame;
401                         translation_input = scale_output;
402
403 // Adjust input coordinates to reflect new scaled coordinates.
404                         in_x1 = (in_x1 - in_x1_int) * w_scale;
405                         in_y1 = (in_y1 - in_y1_int) * h_scale;
406                         in_x2 = (in_x2 - in_x1_int) * w_scale;
407                         in_y2 = (in_y2 - in_y1_int) * h_scale;
408                 }
409
410
411
412 //printf("Overlay 1\n");
413
414 // Scale input -> scale_output
415                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
416                 scale_engine->scale_output = scale_output;
417                 scale_engine->scale_input = input;
418                 scale_engine->w_scale = w_scale;
419                 scale_engine->h_scale = h_scale;
420                 scale_engine->in_x1_int = in_x1_int;
421                 scale_engine->in_y1_int = in_y1_int;
422                 scale_engine->out_w_int = temp_w;
423                 scale_engine->out_h_int = temp_h;
424                 scale_engine->interpolation_type = interpolation_type;
425 //printf("Overlay 2\n");
426
427 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
428                 scale_engine->process_packages();
429 //printf("OverlayFrame::overlay ScaleEngine 2\n");
430
431
432
433         }
434
435 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
436 //      in_x1, 
437 //      in_y1, 
438 //      in_x2, 
439 //      in_y2, 
440 //      out_x1, 
441 //      out_y1, 
442 //      out_x2, 
443 //      out_y2);
444
445
446
447
448
449 #define NO_TRANSLATION2 \
450         (EQUIV(in_x1, 0) && \
451         EQUIV(in_y1, 0) && \
452         EQUIV(in_x2, translation_input->get_w()) && \
453         EQUIV(in_y2, translation_input->get_h()) && \
454         EQUIV(out_x1, 0) && \
455         EQUIV(out_y1, 0) && \
456         EQUIV(out_x2, output->get_w()) && \
457         EQUIV(out_y2, output->get_h())) \
458
459 #define NO_SCALE \
460         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
461         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
462
463         
464
465
466 //printf("OverlayFrame::overlay 4 %d\n", mode);
467
468
469
470
471         if(translation_input)
472         {
473 // Direct copy
474                 if( NO_TRANSLATION2 &&
475                         NO_SCALE &&
476                         NO_BLEND)
477                 {
478 //printf("OverlayFrame::overlay direct copy\n");
479                         output->copy_from(translation_input);
480                 }
481                 else
482 // Blend only
483                 if( NO_TRANSLATION2 &&
484                         NO_SCALE)
485                 {
486                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
487
488
489                         blend_engine->output = output;
490                         blend_engine->input = translation_input;
491                         blend_engine->alpha = alpha;
492                         blend_engine->mode = mode;
493
494                         blend_engine->process_packages();
495                 }
496                 else
497 // Scale and translate using nearest neighbor
498 // Translation is exactly on integer boundaries
499                 if(interpolation_type == NEAREST_NEIGHBOR ||
500                         EQUIV(in_x1, (int)in_x1) &&
501                         EQUIV(in_y1, (int)in_y1) &&
502                         EQUIV(in_x2, (int)in_x2) &&
503                         EQUIV(in_y2, (int)in_y2) &&
504
505                         EQUIV(out_x1, (int)out_x1) &&
506                         EQUIV(out_y1, (int)out_y1) &&
507                         EQUIV(out_x2, (int)out_x2) &&
508                         EQUIV(out_y2, (int)out_y2))
509                 {
510 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
511                         if(!scaletranslate_engine) scaletranslate_engine = new ScaleTranslateEngine(this, cpus);
512
513
514                         scaletranslate_engine->output = output;
515                         scaletranslate_engine->input = translation_input;
516                         scaletranslate_engine->in_x1 = (int)in_x1;
517                         scaletranslate_engine->in_y1 = (int)in_y1;
518                         scaletranslate_engine->in_x2 = (int)in_x2;
519                         scaletranslate_engine->in_y2 = (int)in_y2;
520                         scaletranslate_engine->out_x1 = (int)out_x1;
521                         scaletranslate_engine->out_y1 = (int)out_y1;
522                         scaletranslate_engine->out_x2 = (int)out_x2;
523                         scaletranslate_engine->out_y2 = (int)out_y2;
524                         scaletranslate_engine->alpha = alpha;
525                         scaletranslate_engine->mode = mode;
526
527                         scaletranslate_engine->process_packages();
528                 }
529                 else
530 // Fractional translation
531                 {
532 // Use fractional translation
533 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
534 //      in_x1, 
535 //      in_y1, 
536 //      in_x2, 
537 //      in_y2, 
538 //      out_x1, 
539 //      out_y1, 
540 //      out_x2, 
541 //      out_y2);
542
543 //printf("Overlay 3\n");
544                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
545                         translate_engine->translate_output = output;
546                         translate_engine->translate_input = translation_input;
547                         translate_engine->translate_in_x1 = in_x1;
548                         translate_engine->translate_in_y1 = in_y1;
549                         translate_engine->translate_in_x2 = in_x2;
550                         translate_engine->translate_in_y2 = in_y2;
551                         translate_engine->translate_out_x1 = out_x1;
552                         translate_engine->translate_out_y1 = out_y1;
553                         translate_engine->translate_out_x2 = out_x2;
554                         translate_engine->translate_out_y2 = out_y2;
555                         translate_engine->translate_alpha = alpha;
556                         translate_engine->translate_mode = mode;
557 //printf("Overlay 4\n");
558
559 //printf("OverlayFrame::overlay 5 %d\n", mode);
560                         translate_engine->process_packages();
561
562                 }
563         }
564 //printf("OverlayFrame::overlay 2\n");
565
566         return 0;
567 }
568
569
570
571
572
573
574
575 ScalePackage::ScalePackage()
576 {
577 }
578
579
580
581
582 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
583  : LoadClient(server)
584 {
585         this->overlay = overlay;
586         this->engine = server;
587 }
588
589 ScaleUnit::~ScaleUnit()
590 {
591 }
592
593
594
595 #define BILINEAR(max, type, components) \
596 { \
597         float k_y = 1.0 / scale_h; \
598         float k_x = 1.0 / scale_w; \
599         type **in_rows = (type**)input->get_rows(); \
600         type **out_rows = (type**)output->get_rows(); \
601         int out_h = pkg->out_row2 - pkg->out_row1; \
602         int in_h_int = input->get_h(); \
603         int in_w_int = input->get_w(); \
604         int *table_int_x1, *table_int_y1; \
605         int *table_int_x2, *table_int_y2; \
606         float *table_frac_x, *table_antifrac_x, *table_frac_y, *table_antifrac_y; \
607  \
608         tabulate_blinear(table_int_x1,  \
609                 table_int_x2,  \
610                 table_frac_x,  \
611                 table_antifrac_x,  \
612                 k_x,  \
613                 0,  \
614                 out_w_int, \
615                 in_x1_int,  \
616                 in_w_int); \
617         tabulate_blinear(table_int_y1,  \
618                 table_int_y2,  \
619                 table_frac_y,  \
620                 table_antifrac_y,  \
621                 k_y,  \
622                 pkg->out_row1,  \
623                 pkg->out_row2,  \
624                 in_y1_int, \
625                 in_h_int); \
626  \
627         for(int i = 0; i < out_h; i++) \
628         { \
629                 int i_y1 = table_int_y1[i]; \
630                 int i_y2 = table_int_y2[i]; \
631                 float a = table_frac_y[i]; \
632         float anti_a = table_antifrac_y[i]; \
633                 type *in_row1 = in_rows[i_y1]; \
634                 type *in_row2 = in_rows[i_y2]; \
635                 type *out_row = out_rows[i + pkg->out_row1]; \
636  \
637                 for(int j = 0; j < out_w_int; j++) \
638                 { \
639                         int i_x1 = table_int_x1[j]; \
640                         int i_x2 = table_int_x2[j]; \
641                         float b = table_frac_x[j]; \
642                         float anti_b = table_antifrac_x[j]; \
643                         float output1r, output1g, output1b, output1a; \
644                         float output2r, output2g, output2b, output2a; \
645                         float output3r, output3g, output3b, output3a; \
646                         float output4r, output4g, output4b, output4a; \
647  \
648                         output1r = in_row1[i_x1 * components]; \
649                         output1g = in_row1[i_x1 * components + 1]; \
650                         output1b = in_row1[i_x1 * components + 2]; \
651                         if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
652  \
653                         output2r = in_row1[i_x2 * components]; \
654                         output2g = in_row1[i_x2 * components + 1]; \
655                         output2b = in_row1[i_x2 * components + 2]; \
656                         if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
657  \
658                         output3r = in_row2[i_x1 * components]; \
659                         output3g = in_row2[i_x1 * components + 1]; \
660                         output3b = in_row2[i_x1 * components + 2]; \
661                         if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
662 \
663                         output4r = in_row2[i_x2 * components]; \
664                         output4g = in_row2[i_x2 * components + 1]; \
665                         output4b = in_row2[i_x2 * components + 2]; \
666                         if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
667  \
668                         out_row[j * components] =  \
669                                 (type)((anti_a) * (((anti_b) * output1r) +  \
670                                 (b * output2r)) +  \
671                 a * (((anti_b) * output3r) +  \
672                                 (b * output4r))); \
673                         out_row[j * components + 1] =   \
674                                 (type)((anti_a) * (((anti_b) * output1g) +  \
675                                 (b * output2g)) +  \
676                 a * (((anti_b) * output3g) +  \
677                                 (b * output4g))); \
678                         out_row[j * components + 2] =   \
679                                 (type)((anti_a) * (((anti_b) * output1b) +  \
680                                 (b * output2b)) +  \
681                 a * (((anti_b) * output3b) +  \
682                                 (b * output4b))); \
683                         if(components == 4) \
684                                 out_row[j * components + 3] =   \
685                                         (type)((anti_a) * (((anti_b) * output1a) +  \
686                                         (b * output2a)) +  \
687                         a * (((anti_b) * output3a) +  \
688                                         (b * output4a))); \
689                 } \
690         } \
691  \
692  \
693         delete [] table_int_x1; \
694         delete [] table_int_x2; \
695         delete [] table_frac_x; \
696         delete [] table_antifrac_x; \
697         delete [] table_int_y1; \
698         delete [] table_int_y2; \
699         delete [] table_frac_y; \
700         delete [] table_antifrac_y; \
701  \
702 }
703
704
705 #define BICUBIC(max, type, components) \
706 { \
707         float k_y = 1.0 / scale_h; \
708         float k_x = 1.0 / scale_w; \
709         type **in_rows = (type**)input->get_rows(); \
710         type **out_rows = (type**)output->get_rows(); \
711         float *bspline_x, *bspline_y; \
712         int *in_x_table, *in_y_table; \
713         int in_h_int = input->get_h(); \
714         int in_w_int = input->get_w(); \
715  \
716         tabulate_bicubic(bspline_x,  \
717                 in_x_table, \
718                 k_x, \
719                 in_x1_int, \
720                 out_w_int, \
721                 in_w_int, \
722                 -1); \
723  \
724         tabulate_bicubic(bspline_y,  \
725                 in_y_table, \
726                 k_y, \
727                 in_y1_int, \
728                 out_h_int, \
729                 in_h_int, \
730                 1); \
731  \
732         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
733         { \
734                 for(int j = 0; j < out_w_int; j++) \
735                 { \
736                         int i_x = (int)(k_x * j); \
737                         float output1, output2, output3, output4; \
738                         output1 = 0; \
739                         output2 = 0; \
740                         output3 = 0; \
741                         if(components == 4) \
742                                 output4 = 0; \
743                         int table_y = i * 4; \
744  \
745 /* Kernel */ \
746                         for(int m = -1; m < 3; m++) \
747                         { \
748                                 float r1 = bspline_y[table_y]; \
749                                 int y = in_y_table[table_y]; \
750                                 int table_x = j * 4; \
751  \
752                                 for(int n = -1; n < 3; n++) \
753                                 { \
754                                         float r2 = bspline_x[table_x]; \
755                                         int x = in_x_table[table_x]; \
756                                         float r_square = r1 * r2; \
757  \
758                                         output1 += r_square * in_rows[y][x * components]; \
759                                         output2 += r_square * in_rows[y][x * components + 1]; \
760                                         output3 += r_square * in_rows[y][x * components + 2]; \
761                                         if(components == 4) \
762                                                 output4 += r_square * in_rows[y][x * components + 3]; \
763  \
764                                         table_x++; \
765                                 } \
766                                 table_y++; \
767                         } \
768  \
769  \
770                         out_rows[i][j * components] = (type)output1; \
771                         out_rows[i][j * components + 1] = (type)output2; \
772                         out_rows[i][j * components + 2] = (type)output3; \
773                         if(components == 4) \
774                                 out_rows[i][j * components + 3] = (type)output4; \
775  \
776                 } \
777         } \
778  \
779         delete [] bspline_x; \
780         delete [] bspline_y; \
781         delete [] in_x_table; \
782         delete [] in_y_table; \
783 }
784
785
786
787
788 // Pow function is not thread safe in Compaqt C
789 #define CUBE(x) ((x) * (x) * (x))
790
791 float ScaleUnit::cubic_bspline(float x)
792 {
793         float a, b, c, d;
794
795         if((x + 2.0F) <= 0.0F) 
796         {
797         a = 0.0F;
798         }
799         else 
800         {
801         a = CUBE(x + 2.0F);
802         }
803
804
805         if((x + 1.0F) <= 0.0F) 
806         {
807         b = 0.0F;
808         }
809         else 
810         {
811         b = CUBE(x + 1.0F);
812         }    
813
814         if(x <= 0) 
815         {
816         c = 0.0F;
817         }
818         else 
819         {
820         c = CUBE(x);
821         }  
822
823         if((x - 1.0F) <= 0.0F) 
824         {
825         d = 0.0F;
826         }
827         else 
828         {
829         d = CUBE(x - 1.0F);
830         }
831
832
833         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
834 }
835
836
837 void ScaleUnit::tabulate_bicubic(float* &coef_table, 
838         int* &coord_table,
839         float scale,
840         int start, 
841         int pixels,
842         int total_pixels,
843         float coefficient)
844 {
845         coef_table = new float[pixels * 4];
846         coord_table = new int[pixels * 4];
847         for(int i = 0, j = 0; i < pixels; i++)
848         {
849                 float f_x = (float)i * scale;
850                 float a = f_x - floor(f_x);
851                 
852                 for(float m = -1; m < 3; m++)
853                 {
854                         coef_table[j] = cubic_bspline(coefficient * (m - a));
855                         coord_table[j] = start + (int)f_x + m;
856                         CLAMP(coord_table[j], 0, total_pixels - 1);
857                         j++;
858                 }
859                 
860         }
861 }
862
863 void ScaleUnit::tabulate_blinear(int* &table_int1,
864                 int* &table_int2,
865                 float* &table_frac,
866                 float* &table_antifrac,
867                 float scale,
868                 int pixel1,
869                 int pixel2,
870                 int start,
871                 int total_pixels)
872 {
873         table_int1 = new int[pixel2 - pixel1];
874         table_int2 = new int[pixel2 - pixel1];
875         table_frac = new float[pixel2 - pixel1];
876         table_antifrac = new float[pixel2 - pixel1];
877
878         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
879         {
880                 float f_x = (float)i * scale;
881                 int i_x = (int)floor(f_x);
882                 float a = (f_x - floor(f_x));
883
884                 table_int1[j] = i_x + start;
885                 table_int2[j] = i_x + start + 1;
886                 CLAMP(table_int1[j], 0, total_pixels - 1);
887                 CLAMP(table_int2[j], 0, total_pixels - 1);
888                 table_frac[j] = a;
889                 table_antifrac[j] = 1.0F - a;
890 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
891         }
892 }
893
894 void ScaleUnit::process_package(LoadPackage *package)
895 {
896         ScalePackage *pkg = (ScalePackage*)package;
897
898 //printf("ScaleUnit::process_package 1\n");
899 // Arguments for macros
900         VFrame *output = engine->scale_output;
901         VFrame *input = engine->scale_input;
902         float scale_w = engine->w_scale;
903         float scale_h = engine->h_scale;
904         int in_x1_int = engine->in_x1_int;
905         int in_y1_int = engine->in_y1_int;
906         int out_h_int = engine->out_h_int;
907         int out_w_int = engine->out_w_int;
908         int do_yuv = 
909                 (input->get_color_model() == BC_YUV888 ||
910                 input->get_color_model() == BC_YUVA8888 ||
911                 input->get_color_model() == BC_YUV161616 ||
912                 input->get_color_model() == BC_YUVA16161616);
913
914 //printf("ScaleUnit::process_package 2\n");
915         if(engine->interpolation_type == CUBIC_CUBIC || 
916                 (engine->interpolation_type == CUBIC_LINEAR 
917                         && engine->w_scale > 1 && 
918                         engine->h_scale > 1))
919         {
920         
921                 switch(engine->scale_input->get_color_model())
922                 {
923                         case BC_RGB888:
924                         case BC_YUV888:
925                                 BICUBIC(0xff, unsigned char, 3);
926                                 break;
927
928                         case BC_RGBA8888:
929                         case BC_YUVA8888:
930                                 BICUBIC(0xff, unsigned char, 4);
931                                 break;
932
933                         case BC_RGB161616:
934                         case BC_YUV161616:
935                                 BICUBIC(0xffff, uint16_t, 3);
936                                 break;
937
938                         case BC_RGBA16161616:
939                         case BC_YUVA16161616:
940                                 BICUBIC(0xffff, uint16_t, 4);
941                                 break;
942                 }
943         }
944         else
945 // Perform bilinear scaling input -> scale_output
946         {
947                 switch(engine->scale_input->get_color_model())
948                 {
949                         case BC_RGB888:
950                         case BC_YUV888:
951                                 BILINEAR(0xff, unsigned char, 3);
952                                 break;
953
954                         case BC_RGBA8888:
955                         case BC_YUVA8888:
956                                 BILINEAR(0xff, unsigned char, 4);
957                                 break;
958
959                         case BC_RGB161616:
960                         case BC_YUV161616:
961                                 BILINEAR(0xffff, uint16_t, 3);
962                                 break;
963
964                         case BC_RGBA16161616:
965                         case BC_YUVA16161616:
966                                 BILINEAR(0xffff, uint16_t, 4);
967                                 break;
968                 }
969         }
970 //printf("ScaleUnit::process_package 3\n");
971
972 }
973
974
975
976
977
978
979
980
981
982
983
984
985
986 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
987  : LoadServer(cpus, cpus)
988 {
989         this->overlay = overlay;
990 }
991
992 ScaleEngine::~ScaleEngine()
993 {
994 }
995
996 void ScaleEngine::init_packages()
997 {
998         for(int i = 0; i < total_packages; i++)
999         {
1000                 ScalePackage *package = (ScalePackage*)packages[i];
1001                 package->out_row1 = out_h_int / total_packages * i;
1002                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1003
1004                 if(i >= total_packages - 1)
1005                         package->out_row2 = out_h_int;
1006         }
1007 }
1008
1009 LoadClient* ScaleEngine::new_client()
1010 {
1011         return new ScaleUnit(this, overlay);
1012 }
1013
1014 LoadPackage* ScaleEngine::new_package()
1015 {
1016         return new ScalePackage;
1017 }
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031 TranslatePackage::TranslatePackage()
1032 {
1033 }
1034
1035
1036
1037 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1038  : LoadClient(server)
1039 {
1040         this->overlay = overlay;
1041         this->engine = server;
1042 }
1043
1044 TranslateUnit::~TranslateUnit()
1045 {
1046 }
1047
1048
1049
1050 void TranslateUnit::translation_array(transfer_table* &table, 
1051         float out_x1, 
1052         float out_x2,
1053         float in_x1,
1054         float in_x2,
1055         int in_total, 
1056         int out_total, 
1057         int &out_x1_int,
1058         int &out_x2_int)
1059 {
1060         int out_w_int;
1061         float offset = out_x1 - in_x1;
1062
1063         out_x1_int = (int)out_x1;
1064         out_x2_int = MIN((int)ceil(out_x2), out_total);
1065         out_w_int = out_x2_int - out_x1_int;
1066
1067         table = new transfer_table[out_w_int];
1068         bzero(table, sizeof(transfer_table) * out_w_int);
1069
1070
1071 //printf("OverlayFrame::translation_array 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1072
1073         float in_x = in_x1;
1074         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1075         {
1076                 transfer_table *entry = &table[out_x - out_x1_int];
1077
1078                 entry->in_x1 = (int)in_x;
1079                 entry->in_x2 = (int)in_x + 1;
1080
1081 // Get fraction of output pixel to fill
1082                 entry->output_fraction = 1;
1083
1084                 if(out_x1 > out_x)
1085                 {
1086                         entry->output_fraction -= out_x1 - out_x;
1087                 }
1088
1089                 if(out_x2 < out_x + 1)
1090                 {
1091                         entry->output_fraction = (out_x2 - out_x);
1092                 }
1093
1094 // Advance in_x until out_x_fraction is filled
1095                 float out_x_fraction = entry->output_fraction;
1096                 float in_x_fraction = floor(in_x + 1) - in_x;
1097
1098                 if(out_x_fraction <= in_x_fraction)
1099                 {
1100                         entry->in_fraction1 = out_x_fraction;
1101                         entry->in_fraction2 = 0.0;
1102                         in_x += out_x_fraction;
1103                 }
1104                 else
1105                 {
1106                         entry->in_fraction1 = in_x_fraction;
1107                         in_x += out_x_fraction;
1108                         entry->in_fraction2 = in_x - floor(in_x);
1109                 }
1110
1111 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1112                 if(entry->in_x2 >= in_total)
1113                 {
1114                         entry->in_x2 = in_total - 1;
1115                         entry->in_fraction2 = 0.0;
1116                 }
1117                 
1118                 if(entry->in_x1 >= in_total)
1119                 {
1120                         entry->in_x1 = in_total - 1;
1121                         entry->in_fraction1 = 0.0;
1122                 }
1123 // printf("OverlayFrame::translation_array 2 %d %d %d %f %f %f\n", 
1124 //      out_x, 
1125 //      entry->in_x1, 
1126 //      entry->in_x2, 
1127 //      entry->in_fraction1, 
1128 //      entry->in_fraction2, 
1129 //      entry->output_fraction);
1130         }
1131 }
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166 #define TRANSLATE(max, type, components, do_yuv) \
1167 { \
1168  \
1169         type **in_rows = (type**)input->get_rows(); \
1170         type **out_rows = (type**)output->get_rows(); \
1171  \
1172 /* printf("OverlayFrame::translate 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",  */ \
1173 /*      (in_x1),  in_y1,  in_x2,  in_y2,  out_x1,  out_y1, out_x2,  out_y2); */ \
1174  \
1175         unsigned int master_opacity = (int)(alpha * max + 0.5); \
1176         unsigned int master_transparency = max - master_opacity; \
1177         float zero_r, zero_g, zero_b; \
1178         zero_r = 0; \
1179         zero_b = ((max + 1) >> 1) * (do_yuv); \
1180         zero_g = ((max + 1) >> 1) * (do_yuv); \
1181  \
1182 /* printf("TRANSLATE %d\n", mode); */ \
1183  \
1184         for(int i = row1; i < row2; i++) \
1185         { \
1186                 int in_y1 = y_table[i - out_y1_int].in_x1; \
1187                 int in_y2 = y_table[i - out_y1_int].in_x2; \
1188                 float y_fraction1 = y_table[i - out_y1_int].in_fraction1; \
1189                 float y_fraction2 = y_table[i - out_y1_int].in_fraction2; \
1190                 float y_output_fraction = y_table[i - out_y1_int].output_fraction; \
1191                 type *in_row1 = in_rows[(in_y1)]; \
1192                 type *in_row2 = in_rows[(in_y2)]; \
1193                 type *out_row = out_rows[i]; \
1194  \
1195                 for(int j = out_x1_int; j < out_x2_int; j++) \
1196                 { \
1197                         int in_x1 = x_table[j - out_x1_int].in_x1; \
1198                         int in_x2 = x_table[j - out_x1_int].in_x2; \
1199                         float x_fraction1 = x_table[j - out_x1_int].in_fraction1; \
1200                         float x_fraction2 = x_table[j - out_x1_int].in_fraction2; \
1201                         float x_output_fraction = x_table[j - out_x1_int].output_fraction; \
1202                         type *output = &out_row[j * components]; \
1203                         int input1, input2, input3, input4; \
1204                         float fraction1 = x_fraction1 * y_fraction1; \
1205                         float fraction2 = x_fraction2 * y_fraction1; \
1206                         float fraction3 = x_fraction1 * y_fraction2; \
1207                         float fraction4 = x_fraction2 * y_fraction2; \
1208  \
1209                         input1 = (int)(in_row1[in_x1 * components] * fraction1 +  \
1210                                 in_row1[in_x2 * components] * fraction2 +  \
1211                                 in_row2[in_x1 * components] * fraction3 +  \
1212                                 in_row2[in_x2 * components] * fraction4 + 0.5); \
1213  \
1214 /* Add chroma to fractional pixels */ \
1215                         if(do_yuv) \
1216                         { \
1217                                 float extra_chroma = (1.0F - \
1218                                         fraction1 - \
1219                                         fraction2 - \
1220                                         fraction3 - \
1221                                         fraction4) * zero_b; \
1222                                 input2 = (int)(in_row1[in_x1 * components + 1] * fraction1 +  \
1223                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1224                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1225                                         in_row2[in_x2 * components + 1] * fraction4 + \
1226                                         extra_chroma + 0.5); \
1227                                 input3 = (int)(in_row1[in_x1 * components + 2] * fraction1 +  \
1228                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1229                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1230                                         in_row2[in_x2 * components + 2] * fraction4 +  \
1231                                         extra_chroma + 0.5); \
1232                         } \
1233                         else \
1234                         { \
1235                                 input2 = (int)(in_row1[in_x1 * components + 1] * fraction1 +  \
1236                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1237                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1238                                         in_row2[in_x2 * components + 1] * fraction4 + 0.5); \
1239                                 input3 = (int)(in_row1[in_x1 * components + 2] * fraction1 +  \
1240                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1241                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1242                                         in_row2[in_x2 * components + 2] * fraction4 + 0.5); \
1243                         } \
1244  \
1245                         if(components == 4) \
1246                                 input4 = (int)(in_row1[in_x1 * components + 3] * fraction1 +  \
1247                                         in_row1[in_x2 * components + 3] * fraction2 +  \
1248                                         in_row2[in_x1 * components + 3] * fraction3 +  \
1249                                         in_row2[in_x2 * components + 3] * fraction4 + 0.5); \
1250  \
1251                         unsigned int opacity = (int)(master_opacity *  \
1252                                 y_output_fraction *  \
1253                                 x_output_fraction + 0.5); \
1254                         unsigned int transparency = max - opacity; \
1255  \
1256 /* if(opacity != max) printf("TRANSLATE %x %d %d\n", opacity, j, i); */ \
1257  \
1258                         if(components == 3) \
1259                         { \
1260                                 BLEND_3(max, type); \
1261                         } \
1262                         else \
1263                         { \
1264                                 BLEND_4(max, type); \
1265                         } \
1266                 } \
1267         } \
1268 }
1269
1270 void TranslateUnit::process_package(LoadPackage *package)
1271 {
1272         TranslatePackage *pkg = (TranslatePackage*)package;
1273         int out_y1_int; 
1274         int out_y2_int; 
1275         int out_x1_int; 
1276         int out_x2_int; 
1277
1278
1279 // Variables for TRANSLATE
1280         VFrame *input = engine->translate_input;
1281         VFrame *output = engine->translate_output;
1282         float in_x1 = engine->translate_in_x1;
1283         float in_y1 = engine->translate_in_y1;
1284         float in_x2 = engine->translate_in_x2;
1285         float in_y2 = engine->translate_in_y2;
1286         float out_x1 = engine->translate_out_x1;
1287         float out_y1 = engine->translate_out_y1;
1288         float out_x2 = engine->translate_out_x2;
1289         float out_y2 = engine->translate_out_y2;
1290         float alpha = engine->translate_alpha;
1291         int row1 = pkg->out_row1;
1292         int row2 = pkg->out_row2;
1293         int mode = engine->translate_mode;
1294         int in_total_x = input->get_w();
1295         int in_total_y = input->get_h();
1296         int do_yuv = 
1297                 (engine->translate_input->get_color_model() == BC_YUV888 ||
1298                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1299                 engine->translate_input->get_color_model() == BC_YUV161616 ||
1300                 engine->translate_input->get_color_model() == BC_YUVA16161616);
1301
1302         transfer_table *x_table; 
1303         transfer_table *y_table; 
1304  
1305         translation_array(x_table,  
1306                 out_x1,  
1307                 out_x2, 
1308                 in_x1, 
1309                 in_x2, 
1310                 in_total_x,  
1311                 output->get_w(),  
1312                 out_x1_int, 
1313                 out_x2_int); 
1314         translation_array(y_table,  
1315                 out_y1,  
1316                 out_y2, 
1317                 in_y1, 
1318                 in_y2, 
1319                 in_total_y,  
1320                 output->get_h(),  
1321                 out_y1_int, 
1322                 out_y2_int); 
1323  
1324         switch(engine->translate_input->get_color_model())
1325         {
1326                 case BC_RGB888:
1327                         TRANSLATE(0xff, unsigned char, 3, 0);
1328                         break;
1329
1330                 case BC_RGBA8888:
1331                         TRANSLATE(0xff, unsigned char, 4, 0);
1332                         break;
1333
1334                 case BC_RGB161616:
1335                         TRANSLATE(0xffff, uint16_t, 3, 0);
1336                         break;
1337
1338                 case BC_RGBA16161616:
1339                         TRANSLATE(0xffff, uint16_t, 4, 0);
1340                         break;
1341
1342                 case BC_YUV888:
1343                         TRANSLATE(0xff, unsigned char, 3, 1);
1344                         break;
1345
1346                 case BC_YUVA8888:
1347                         TRANSLATE(0xff, unsigned char, 4, 1);
1348                         break;
1349
1350                 case BC_YUV161616:
1351                         TRANSLATE(0xffff, uint16_t, 3, 1);
1352                         break;
1353
1354                 case BC_YUVA16161616:
1355                         TRANSLATE(0xffff, uint16_t, 4, 1);
1356                         break;
1357         }
1358  
1359         delete [] x_table; 
1360         delete [] y_table; 
1361 }
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
1373  : LoadServer(cpus, cpus)
1374 {
1375         this->overlay = overlay;
1376 }
1377
1378 TranslateEngine::~TranslateEngine()
1379 {
1380 }
1381
1382 void TranslateEngine::init_packages()
1383 {
1384         int out_y1_int = (int)translate_out_y1;
1385         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
1386         int out_h = out_y2_int - out_y1_int;
1387
1388         for(int i = 0; i < total_packages; i++)
1389         {
1390                 TranslatePackage *package = (TranslatePackage*)packages[i];
1391                 package->out_row1 = (int)(out_y1_int + out_h / 
1392                         total_packages * 
1393                         i);
1394                 package->out_row2 = (int)((float)package->out_row1 + 
1395                         out_h / 
1396                         total_packages);
1397                 if(i >= total_packages - 1)
1398                         package->out_row2 = out_y2_int;
1399         }
1400 }
1401
1402 LoadClient* TranslateEngine::new_client()
1403 {
1404         return new TranslateUnit(this, overlay);
1405 }
1406
1407 LoadPackage* TranslateEngine::new_package()
1408 {
1409         return new TranslatePackage;
1410 }
1411
1412
1413
1414
1415
1416
1417
1418
1419 #define SCALE_TRANSLATE(max, type, components) \
1420 { \
1421         int64_t opacity = (int)(alpha * max + 0.5); \
1422         int64_t transparency = max - opacity; \
1423         int out_w = out_x2 - out_x1; \
1424  \
1425         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1426         { \
1427                 int in_y = y_table[i - out_y1]; \
1428                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
1429                 type *out_row = (type*)out_rows[i] + out_x1 * components; \
1430  \
1431 /* X direction is scaled and requires a table lookup */ \
1432                 if(out_w != in_x2 - in_x1) \
1433                 { \
1434                         for(int j = 0; j < out_w; j++) \
1435                         { \
1436                                 int in_x = x_table[j]; \
1437                                 int input1, input2, input3, input4; \
1438                                 type *output = out_row + j * components; \
1439          \
1440                                 input1 = in_row[in_x * components]; \
1441                                 input2 = in_row[in_x * components + 1]; \
1442                                 input3 = in_row[in_x * components + 2]; \
1443                                 if(components == 4) \
1444                                         input4 = in_row[in_x * components + 3]; \
1445          \
1446                                 if(components == 3) \
1447                                 { \
1448                                         BLEND_3(max, type); \
1449                                 } \
1450                                 else \
1451                                 { \
1452                                         BLEND_4(max, type); \
1453                                 } \
1454                         } \
1455                 } \
1456                 else \
1457 /* X direction is not scaled */ \
1458                 { \
1459                         for(int j = 0; j < out_w; j++) \
1460                         { \
1461                                 int input1, input2, input3, input4; \
1462                                 type *output = out_row + j * components; \
1463          \
1464                                 input1 = in_row[j * components]; \
1465                                 input2 = in_row[j * components + 1]; \
1466                                 input3 = in_row[j * components + 2]; \
1467                                 if(components == 4) \
1468                                         input4 = in_row[j * components + 3]; \
1469          \
1470                                 if(components == 3) \
1471                                 { \
1472                                         BLEND_3(max, type); \
1473                                 } \
1474                                 else \
1475                                 { \
1476                                         BLEND_4(max, type); \
1477                                 } \
1478                         } \
1479                 } \
1480         } \
1481 }
1482
1483
1484
1485 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
1486  : LoadClient(server)
1487 {
1488         this->overlay = overlay;
1489         this->scale_translate = server;
1490 }
1491
1492 ScaleTranslateUnit::~ScaleTranslateUnit()
1493 {
1494 }
1495
1496 void ScaleTranslateUnit::scale_array(int* &table, 
1497         int out_x1, 
1498         int out_x2,
1499         int in_x1,
1500         int in_x2,
1501         int is_x)
1502 {
1503         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
1504
1505         table = new int[out_x2 - out_x1];
1506         
1507         if(!is_x)
1508         {
1509                 for(int i = 0; i < out_x2 - out_x1; i++)
1510                 {
1511                         table[i] = (int)((float)i / scale + in_x1);
1512                 }
1513         }
1514         else
1515         {       
1516                 for(int i = 0; i < out_x2 - out_x1; i++)
1517                 {
1518                         table[i] = (int)((float)i / scale);
1519                 }
1520         }
1521 }
1522
1523
1524 void ScaleTranslateUnit::process_package(LoadPackage *package)
1525 {
1526         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
1527
1528 // Args for NEAREST_NEIGHBOR_MACRO
1529         VFrame *output = scale_translate->output;
1530         VFrame *input = scale_translate->input;
1531         int in_x1 = scale_translate->in_x1;
1532         int in_y1 = scale_translate->in_y1;
1533         int in_x2 = scale_translate->in_x2;
1534         int in_y2 = scale_translate->in_y2;
1535         int out_x1 = scale_translate->out_x1;
1536         int out_y1 = scale_translate->out_y1;
1537         int out_x2 = scale_translate->out_x2;
1538         int out_y2 = scale_translate->out_y2;
1539         float alpha = scale_translate->alpha;
1540         int mode = scale_translate->mode;
1541
1542         int *x_table;
1543         int *y_table;
1544         unsigned char **in_rows = input->get_rows();
1545         unsigned char **out_rows = output->get_rows();
1546
1547         scale_array(x_table, 
1548                 out_x1, 
1549                 out_x2,
1550                 in_x1,
1551                 in_x2,
1552                 1);
1553         scale_array(y_table, 
1554                 out_y1, 
1555                 out_y2,
1556                 in_y1,
1557                 in_y2,
1558                 0);
1559
1560
1561         switch(input->get_color_model())
1562         {
1563                 case BC_RGB888:
1564                 case BC_YUV888:
1565                         SCALE_TRANSLATE(0xff, uint8_t, 3);
1566                         break;
1567
1568                 case BC_RGBA8888:
1569                 case BC_YUVA8888:
1570                         SCALE_TRANSLATE(0xff, uint8_t, 4);
1571                         break;
1572
1573
1574                 case BC_RGB161616:
1575                 case BC_YUV161616:
1576                         SCALE_TRANSLATE(0xffff, uint16_t, 3);
1577                         break;
1578
1579                 case BC_RGBA16161616:
1580                 case BC_YUVA16161616:
1581                         SCALE_TRANSLATE(0xffff, uint16_t, 4);
1582                         break;
1583         }
1584         
1585         delete [] x_table;
1586         delete [] y_table;
1587
1588 };
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
1599  : LoadServer(cpus, cpus)
1600 {
1601         this->overlay = overlay;
1602 }
1603
1604 ScaleTranslateEngine::~ScaleTranslateEngine()
1605 {
1606 }
1607
1608 void ScaleTranslateEngine::init_packages()
1609 {
1610         int out_h = out_y2 - out_y1;
1611
1612         for(int i = 0; i < total_packages; i++)
1613         {
1614                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
1615                 package->out_row1 = (int)(out_y1 + out_h / 
1616                         total_packages * 
1617                         i);
1618                 package->out_row2 = (int)((float)package->out_row1 + 
1619                         out_h / 
1620                         total_packages);
1621                 if(i >= total_packages - 1)
1622                         package->out_row2 = out_y2;
1623         }
1624 }
1625
1626 LoadClient* ScaleTranslateEngine::new_client()
1627 {
1628         return new ScaleTranslateUnit(this, overlay);
1629 }
1630
1631 LoadPackage* ScaleTranslateEngine::new_package()
1632 {
1633         return new ScaleTranslatePackage;
1634 }
1635
1636
1637 ScaleTranslatePackage::ScaleTranslatePackage()
1638 {
1639 }
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668 #define BLEND_ONLY(type, max, components) \
1669 { \
1670         int64_t opacity = (int)(alpha * max + 0.5); \
1671         int64_t transparency = max - opacity; \
1672  \
1673         type** output_rows = (type**)output->get_rows(); \
1674         type** input_rows = (type**)input->get_rows(); \
1675         int w = input->get_w(); \
1676         int h = input->get_h(); \
1677  \
1678         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1679         { \
1680                 type* in_row = input_rows[i]; \
1681                 type* output = output_rows[i]; \
1682  \
1683                 for(int j = 0; j < w; j++) \
1684                 { \
1685                         int input1, input2, input3, input4; \
1686                         input1 = in_row[j * components]; \
1687                         input2 = in_row[j * components + 1]; \
1688                         input3 = in_row[j * components + 2]; \
1689                         if(components == 4) input4 = in_row[j * components + 3]; \
1690  \
1691  \
1692                         if(components == 3) \
1693                         { \
1694                                 BLEND_3(max, type); \
1695                         } \
1696                         else \
1697                         { \
1698                                 BLEND_4(max, type); \
1699                         } \
1700  \
1701                         input += components; \
1702                         output += components; \
1703                 } \
1704         } \
1705 }
1706
1707
1708
1709
1710 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
1711  : LoadClient(server)
1712 {
1713         this->overlay = overlay;
1714         this->blend_engine = server;
1715 }
1716
1717 BlendUnit::~BlendUnit()
1718 {
1719 }
1720
1721 void BlendUnit::process_package(LoadPackage *package)
1722 {
1723         BlendPackage *pkg = (BlendPackage*)package;
1724
1725
1726         VFrame *output = blend_engine->output;
1727         VFrame *input = blend_engine->input;
1728         float alpha = blend_engine->alpha;
1729         int mode = blend_engine->mode;
1730
1731         switch(input->get_color_model())
1732         {
1733                 case BC_RGB888:
1734                 case BC_YUV888:
1735                         BLEND_ONLY(unsigned char, 0xff, 3);
1736                         break;
1737                 case BC_RGBA8888:
1738                 case BC_YUVA8888:
1739                         BLEND_ONLY(unsigned char, 0xff, 4);
1740                         break;
1741                 case BC_RGB161616:
1742                 case BC_YUV161616:
1743                         BLEND_ONLY(uint16_t, 0xffff, 3);
1744                         break;
1745                 case BC_RGBA16161616:
1746                 case BC_YUVA16161616:
1747                         BLEND_ONLY(uint16_t, 0xffff, 4);
1748                         break;
1749         }
1750 }
1751
1752
1753
1754 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
1755  : LoadServer(cpus, cpus)
1756 {
1757         this->overlay = overlay;
1758 }
1759
1760 BlendEngine::~BlendEngine()
1761 {
1762 }
1763
1764 void BlendEngine::init_packages()
1765 {
1766         for(int i = 0; i < total_packages; i++)
1767         {
1768                 BlendPackage *package = (BlendPackage*)packages[i];
1769                 package->out_row1 = (int)(input->get_h() / 
1770                         total_packages * 
1771                         i);
1772                 package->out_row2 = (int)((float)package->out_row1 +
1773                         input->get_h() / 
1774                         total_packages);
1775
1776                 if(i >= total_packages - 1)
1777                         package->out_row2 = input->get_h();
1778         }
1779 }
1780
1781 LoadClient* BlendEngine::new_client()
1782 {
1783         return new BlendUnit(this, overlay);
1784 }
1785
1786 LoadPackage* BlendEngine::new_package()
1787 {
1788         return new BlendPackage;
1789 }
1790
1791
1792 BlendPackage::BlendPackage()
1793 {
1794 }
1795
1796