#include #include #include #include #include "clip.h" #include "edl.inc" #include "mutex.h" #include "overlayframe.h" #include "vframe.h" OverlayFrame::OverlayFrame(int cpus) { temp_frame = 0; blend_engine = 0; scale_engine = 0; scaletranslate_engine = 0; translate_engine = 0; this->cpus = cpus; } OverlayFrame::~OverlayFrame() { //printf("OverlayFrame::~OverlayFrame 1\n"); if(temp_frame) delete temp_frame; if(scale_engine) delete scale_engine; if(translate_engine) delete translate_engine; if(blend_engine) delete blend_engine; if(scaletranslate_engine) delete scaletranslate_engine; //printf("OverlayFrame::~OverlayFrame 2\n"); } // Verification: // (255 * 255 + 0 * 0) / 255 = 255 // (255 * 127 + 255 * (255 - 127)) / 255 = 255 // (65535 * 65535 + 0 * 0) / 65535 = 65535 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535 // Branch prediction 4 U #define BLEND_3(max, type) \ { \ int64_t r, g, b; \ \ /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \ switch(mode) \ { \ case TRANSFER_DIVIDE: \ r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \ g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \ b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \ r = (r * opacity + output[0] * transparency) / max; \ g = (g * opacity + output[1] * transparency) / max; \ b = (b * opacity + output[2] * transparency) / max; \ break; \ case TRANSFER_MULTIPLY: \ r = ((int64_t)input1 * output[0]) / max; \ g = ((int64_t)input2 * output[1]) / max; \ b = ((int64_t)input3 * output[2]) / max; \ r = (r * opacity + output[0] * transparency) / max; \ g = (g * opacity + output[1] * transparency) / max; \ b = (b * opacity + output[2] * transparency) / max; \ break; \ case TRANSFER_SUBTRACT: \ r = (((int64_t)input1 - output[0]) * opacity + output[0] * transparency) / max; \ g = (((int64_t)input2 - output[1]) * opacity + output[1] * transparency) / max; \ b = (((int64_t)input3 - output[2]) * opacity + output[2] * transparency) / max; \ break; \ case TRANSFER_ADDITION: \ r = (((int64_t)input1 + output[0]) * opacity + output[0] * transparency) / max; \ g = (((int64_t)input2 + output[1]) * opacity + output[1] * transparency) / max; \ b = (((int64_t)input3 + output[2]) * opacity + output[2] * transparency) / max; \ break; \ case TRANSFER_REPLACE: \ r = input1; \ g = input2; \ b = input3; \ break; \ case TRANSFER_NORMAL: \ r = ((int64_t)input1 * opacity + output[0] * transparency) / max; \ g = ((int64_t)input2 * opacity + output[1] * transparency) / max; \ b = ((int64_t)input3 * opacity + output[2] * transparency) / max; \ break; \ } \ \ output[0] = (type)CLIP(r, 0, max); \ output[1] = (type)CLIP(g, 0, max); \ output[2] = (type)CLIP(b, 0, max); \ } // Blending equations are drastically different for 3 and 4 components #define BLEND_4(max, type) \ { \ int64_t r, g, b, a; \ int64_t pixel_opacity, pixel_transparency; \ \ pixel_opacity = opacity * input4 / max; \ pixel_transparency = (max - pixel_opacity) * output[3] / max; \ \ switch(mode) \ { \ case TRANSFER_DIVIDE: \ r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \ g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \ b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \ r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \ g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \ b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \ a = input4 > output[3] ? input4 : output[3]; \ break; \ case TRANSFER_MULTIPLY: \ r = ((int64_t)input1 * output[0]) / max; \ g = ((int64_t)input2 * output[1]) / max; \ b = ((int64_t)input3 * output[2]) / max; \ r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \ g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \ b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \ a = input4 > output[3] ? input4 : output[3]; \ break; \ case TRANSFER_SUBTRACT: \ r = (((int64_t)input1 - output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \ g = (((int64_t)input2 - output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \ b = (((int64_t)input3 - output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \ a = input4 > output[3] ? input4 : output[3]; \ break; \ case TRANSFER_ADDITION: \ r = (((int64_t)input1 + output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \ g = (((int64_t)input2 + output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \ b = (((int64_t)input3 + output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \ a = input4 > output[3] ? input4 : output[3]; \ break; \ case TRANSFER_REPLACE: \ r = input1; \ g = input2; \ b = input3; \ a = input4; \ break; \ case TRANSFER_NORMAL: \ r = ((int64_t)input1 * pixel_opacity + output[0] * pixel_transparency) / max; \ g = ((int64_t)input2 * pixel_opacity + output[1] * pixel_transparency) / max; \ b = ((int64_t)input3 * pixel_opacity + output[2] * pixel_transparency) / max; \ a = input4 > output[3] ? input4 : output[3]; \ break; \ } \ \ output[0] = (type)CLIP(r, 0, max); \ output[1] = (type)CLIP(g, 0, max); \ output[2] = (type)CLIP(b, 0, max); \ output[3] = (type)a; \ } // Bicubic algorithm using multiprocessors // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output // Nearest neighbor algorithm using multiprocessors for blending // input -> scale + translate -> blend -> output int OverlayFrame::overlay(VFrame *output, VFrame *input, float in_x1, float in_y1, float in_x2, float in_y2, float out_x1, float out_y1, float out_x2, float out_y2, float alpha, // 0 - 1 int mode, int interpolation_type) { float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1); float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1); //printf("OverlayFrame::overlay 1 %d %f\n", mode, alpha); // Limit values if(in_x1 < 0) { out_x1 += -in_x1 * w_scale; in_x1 = 0; } else if(in_x1 >= input->get_w()) { out_x1 -= (in_x1 - input->get_w()) * w_scale; in_x1 = input->get_w(); } if(in_y1 < 0) { out_y1 += -in_y1 * h_scale; in_y1 = 0; } else if(in_y1 >= input->get_h()) { out_y1 -= (in_y1 - input->get_h()) * h_scale; in_y1 = input->get_h(); } if(in_x2 < 0) { out_x2 += -in_x2 * w_scale; in_x2 = 0; } else if(in_x2 >= input->get_w()) { out_x2 -= (in_x2 - input->get_w()) * w_scale; in_x2 = input->get_w(); } if(in_y2 < 0) { out_y2 += -in_y2 * h_scale; in_y2 = 0; } else if(in_y2 >= input->get_h()) { out_y2 -= (in_y2 - input->get_h()) * h_scale; in_y2 = input->get_h(); } if(out_x1 < 0) { in_x1 += -out_x1 / w_scale; out_x1 = 0; } else if(out_x1 >= output->get_w()) { in_x1 -= (out_x1 - output->get_w()) / w_scale; out_x1 = output->get_w(); } if(out_y1 < 0) { in_y1 += -out_y1 / h_scale; out_y1 = 0; } else if(out_y1 >= output->get_h()) { in_y1 -= (out_y1 - output->get_h()) / h_scale; out_y1 = output->get_h(); } if(out_x2 < 0) { in_x2 += -out_x2 / w_scale; out_x2 = 0; } else if(out_x2 >= output->get_w()) { in_x2 -= (out_x2 - output->get_w()) / w_scale; out_x2 = output->get_w(); } if(out_y2 < 0) { in_y2 += -out_y2 / h_scale; out_y2 = 0; } else if(out_y2 >= output->get_h()) { in_y2 -= (out_y2 - output->get_h()) / h_scale; out_y2 = output->get_h(); } float in_w = in_x2 - in_x1; float in_h = in_y2 - in_y1; float out_w = out_x2 - out_x1; float out_h = out_y2 - out_y1; // Input for translation operation VFrame *translation_input = input; // printf("OverlayFrame::overlay %f %f %f %f -> %f %f %f %f\n", in_x1, // in_y1, // in_x2, // in_y2, // out_x1, // out_y1, // out_x2, // out_y2); // **************************************************************************** // Transfer to temp buffer by scaling nearest integer boundaries // **************************************************************************** if(interpolation_type != NEAREST_NEIGHBOR && interpolation_type != LINEAR_LINEAR && (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1))) { // Create integer boundaries for interpolation int in_x1_int = (int)in_x1; int in_y1_int = (int)in_y1; int in_x2_int = MIN((int)ceil(in_x2), input->get_w()); int in_y2_int = MIN((int)ceil(in_y2), input->get_h()); // Dimensions of temp frame. Integer boundaries scaled. int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int)); int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int)); VFrame *scale_output; #define NO_TRANSLATION1 \ (EQUIV(in_x1, 0) && \ EQUIV(in_y1, 0) && \ EQUIV(out_x1, 0) && \ EQUIV(out_y1, 0) && \ EQUIV(in_x2, in_x2_int) && \ EQUIV(in_y2, in_y2_int) && \ EQUIV(out_x2, temp_w) && \ EQUIV(out_y2, temp_h)) #define NO_BLEND \ (EQUIV(alpha, 1) && \ (mode == TRANSFER_REPLACE || \ (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3))) // Prepare destination for operation // No translation and no blending. The blending operation is built into the // translation unit but not the scaling unit. // input -> output if(NO_TRANSLATION1 && NO_BLEND) { // printf("OverlayFrame::overlay input -> output\n"); scale_output = output; translation_input = 0; } else // If translation or blending // input -> nearest integer boundary temp { if(temp_frame && (temp_frame->get_w() != temp_w || temp_frame->get_h() != temp_h)) { delete temp_frame; temp_frame = 0; } if(!temp_frame) { temp_frame = new VFrame(0, temp_w, temp_h, input->get_color_model(), -1); } //printf("OverlayFrame::overlay input -> temp\n"); temp_frame->clear_frame(); // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n", // temp_w, temp_h); scale_output = temp_frame; translation_input = scale_output; // Adjust input coordinates to reflect new scaled coordinates. in_x1 = (in_x1 - in_x1_int) * w_scale; in_y1 = (in_y1 - in_y1_int) * h_scale; in_x2 = (in_x2 - in_x1_int) * w_scale; in_y2 = (in_y2 - in_y1_int) * h_scale; } //printf("Overlay 1\n"); // Scale input -> scale_output if(!scale_engine) scale_engine = new ScaleEngine(this, cpus); scale_engine->scale_output = scale_output; scale_engine->scale_input = input; scale_engine->w_scale = w_scale; scale_engine->h_scale = h_scale; scale_engine->in_x1_int = in_x1_int; scale_engine->in_y1_int = in_y1_int; scale_engine->out_w_int = temp_w; scale_engine->out_h_int = temp_h; scale_engine->interpolation_type = interpolation_type; //printf("Overlay 2\n"); //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int); scale_engine->process_packages(); //printf("OverlayFrame::overlay ScaleEngine 2\n"); } // printf("OverlayFrame::overlay 1 %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", // in_x1, // in_y1, // in_x2, // in_y2, // out_x1, // out_y1, // out_x2, // out_y2); #define NO_TRANSLATION2 \ (EQUIV(in_x1, 0) && \ EQUIV(in_y1, 0) && \ EQUIV(in_x2, translation_input->get_w()) && \ EQUIV(in_y2, translation_input->get_h()) && \ EQUIV(out_x1, 0) && \ EQUIV(out_y1, 0) && \ EQUIV(out_x2, output->get_w()) && \ EQUIV(out_y2, output->get_h())) \ #define NO_SCALE \ (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \ EQUIV(out_y2 - out_y1, in_y2 - in_y1)) //printf("OverlayFrame::overlay 4 %d\n", mode); if(translation_input) { // Direct copy if( NO_TRANSLATION2 && NO_SCALE && NO_BLEND) { //printf("OverlayFrame::overlay direct copy\n"); output->copy_from(translation_input); } else // Blend only if( NO_TRANSLATION2 && NO_SCALE) { if(!blend_engine) blend_engine = new BlendEngine(this, cpus); blend_engine->output = output; blend_engine->input = translation_input; blend_engine->alpha = alpha; blend_engine->mode = mode; blend_engine->process_packages(); } else // Scale and translate using nearest neighbor // Translation is exactly on integer boundaries if(interpolation_type == NEAREST_NEIGHBOR || EQUIV(in_x1, (int)in_x1) && EQUIV(in_y1, (int)in_y1) && EQUIV(in_x2, (int)in_x2) && EQUIV(in_y2, (int)in_y2) && EQUIV(out_x1, (int)out_x1) && EQUIV(out_y1, (int)out_y1) && EQUIV(out_x2, (int)out_x2) && EQUIV(out_y2, (int)out_y2)) { //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n"); if(!scaletranslate_engine) scaletranslate_engine = new ScaleTranslateEngine(this, cpus); scaletranslate_engine->output = output; scaletranslate_engine->input = translation_input; scaletranslate_engine->in_x1 = (int)in_x1; scaletranslate_engine->in_y1 = (int)in_y1; scaletranslate_engine->in_x2 = (int)in_x2; scaletranslate_engine->in_y2 = (int)in_y2; scaletranslate_engine->out_x1 = (int)out_x1; scaletranslate_engine->out_y1 = (int)out_y1; scaletranslate_engine->out_x2 = (int)out_x2; scaletranslate_engine->out_y2 = (int)out_y2; scaletranslate_engine->alpha = alpha; scaletranslate_engine->mode = mode; scaletranslate_engine->process_packages(); } else // Fractional translation { // Use fractional translation // printf("OverlayFrame::overlay temp -> output %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", // in_x1, // in_y1, // in_x2, // in_y2, // out_x1, // out_y1, // out_x2, // out_y2); //printf("Overlay 3\n"); if(!translate_engine) translate_engine = new TranslateEngine(this, cpus); translate_engine->translate_output = output; translate_engine->translate_input = translation_input; translate_engine->translate_in_x1 = in_x1; translate_engine->translate_in_y1 = in_y1; translate_engine->translate_in_x2 = in_x2; translate_engine->translate_in_y2 = in_y2; translate_engine->translate_out_x1 = out_x1; translate_engine->translate_out_y1 = out_y1; translate_engine->translate_out_x2 = out_x2; translate_engine->translate_out_y2 = out_y2; translate_engine->translate_alpha = alpha; translate_engine->translate_mode = mode; //printf("Overlay 4\n"); //printf("OverlayFrame::overlay 5 %d\n", mode); translate_engine->process_packages(); } } //printf("OverlayFrame::overlay 2\n"); return 0; } ScalePackage::ScalePackage() { } ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay) : LoadClient(server) { this->overlay = overlay; this->engine = server; } ScaleUnit::~ScaleUnit() { } #define BILINEAR(max, type, components) \ { \ float k_y = 1.0 / scale_h; \ float k_x = 1.0 / scale_w; \ type **in_rows = (type**)input->get_rows(); \ type **out_rows = (type**)output->get_rows(); \ type zero_r, zero_g, zero_b, zero_a; \ int in_h_int = input->get_h(); \ int in_w_int = input->get_w(); \ int *table_int_x, *table_int_y; \ float *table_frac_x, *table_antifrac_x, *table_frac_y, *table_antifrac_y; \ \ zero_r = 0; \ zero_g = ((max + 1) >> 1) * (do_yuv); \ zero_b = ((max + 1) >> 1) * (do_yuv); \ if(components == 4) zero_a = 0; \ \ tabulate_blinear(table_int_x, table_frac_x, table_antifrac_x, k_x, 0, out_w_int, in_w_int); \ tabulate_blinear(table_int_y, table_frac_y, table_antifrac_y, k_y, pkg->out_row1, pkg->out_row2, in_h_int); \ \ for(int i = pkg->out_row1; i < pkg->out_row2; i++) \ { \ int i_y = table_int_y[i - pkg->out_row1]; \ float a = table_frac_y[i - pkg->out_row1]; \ float anti_a = table_antifrac_y[i - pkg->out_row1]; \ type *in_row1 = in_rows[i_y + in_y1_int]; \ type *in_row2 = (i_y + in_y1_int < in_h_int - 1) ? \ in_rows[i_y + in_y1_int + 1] : \ 0; \ type *out_row = out_rows[i]; \ \ for(int j = 0; j < out_w_int; j++) \ { \ int i_x = table_int_x[j]; \ float b = table_frac_x[j]; \ float anti_b = table_antifrac_x[j]; \ int x = i_x + in_x1_int; \ float output1r, output1g, output1b, output1a; \ float output2r, output2g, output2b, output2a; \ float output3r, output3g, output3b, output3a; \ float output4r, output4g, output4b, output4a; \ \ output1r = in_row1[x * components]; \ output1g = in_row1[x * components + 1]; \ output1b = in_row1[x * components + 2]; \ if(components == 4) output1a = in_row1[x * components + 3]; \ \ if(x < in_w_int - 1) \ { \ output2r = in_row1[x * components + components]; \ output2g = in_row1[x * components + components + 1]; \ output2b = in_row1[x * components + components + 2]; \ if(components == 4) output2a = in_row1[x * components + components + 3]; \ \ if(in_row2) \ { \ output4r = in_row2[x * components + components]; \ output4g = in_row2[x * components + components + 1]; \ output4b = in_row2[x * components + components + 2]; \ if(components == 4) output4a = in_row2[x * components + components + 3]; \ } \ else \ { \ output4r = zero_r; \ output4g = zero_g; \ output4b = zero_b; \ if(components == 4) output4a = zero_a; \ } \ } \ else \ { \ output2r = zero_r; \ output2g = zero_g; \ output2b = zero_b; \ if(components == 4) output2a = zero_a; \ output4r = zero_r; \ output4g = zero_g; \ output4b = zero_b; \ if(components == 4) output4a = zero_a; \ } \ \ if(in_row2) \ { \ output3r = in_row2[x * components]; \ output3g = in_row2[x * components + 1]; \ output3b = in_row2[x * components + 2]; \ if(components == 4) output3a = in_row2[x * components + 3]; \ } \ else \ { \ output3r = zero_r; \ output3g = zero_g; \ output3b = zero_b; \ if(components == 4) output3a = zero_a; \ } \ \ out_row[j * components] = \ (type)((anti_a) * (((anti_b) * output1r) + \ (b * output2r)) + \ a * (((anti_b) * output3r) + \ (b * output4r))); \ out_row[j * components + 1] = \ (type)((anti_a) * (((anti_b) * output1g) + \ (b * output2g)) + \ a * (((anti_b) * output3g) + \ (b * output4g))); \ out_row[j * components + 2] = \ (type)((anti_a) * (((anti_b) * output1b) + \ (b * output2b)) + \ a * (((anti_b) * output3b) + \ (b * output4b))); \ if(components == 4) \ out_row[j * components + 3] = \ (type)((anti_a) * (((anti_b) * output1a) + \ (b * output2a)) + \ a * (((anti_b) * output3a) + \ (b * output4a))); \ } \ } \ \ \ delete [] table_int_x; \ delete [] table_frac_x; \ delete [] table_antifrac_x; \ delete [] table_int_y; \ delete [] table_frac_y; \ delete [] table_antifrac_y; \ \ } #define BICUBIC(max, type, components) \ { \ float k_y = 1.0 / scale_h; \ float k_x = 1.0 / scale_w; \ type **in_rows = (type**)input->get_rows(); \ type **out_rows = (type**)output->get_rows(); \ float *bspline_x, *bspline_y; \ int in_h_int = input->get_h(); \ int in_w_int = input->get_w(); \ type zero_r, zero_g, zero_b, zero_a; \ /* printf("BICUBIC\n"); */ \ \ zero_r = 0; \ zero_b = ((max + 1) >> 1) * (do_yuv); \ zero_g = ((max + 1) >> 1) * (do_yuv); \ if(components == 4) \ zero_a = 0; \ \ tabulate_bspline(bspline_x, \ k_x, \ out_w_int, \ -1); \ \ tabulate_bspline(bspline_y, \ k_y, \ out_h_int, \ 1); \ \ for(int i = pkg->out_row1; i < pkg->out_row2; i++) \ { \ int i_y = (int)(k_y * i); \ \ \ for(int j = 0; j < out_w_int; j++) \ { \ int i_x = (int)(k_x * j); \ float output1, output2, output3, output4; \ output1 = 0; \ output2 = 0; \ output3 = 0; \ if(components == 4) \ output4 = 0; \ int table_y = i * 4; \ \ /* Kernel */ \ for(int m = -1; m < 3; m++) \ { \ float r1 = bspline_y[table_y++]; \ int y = in_y1_int + i_y + m; \ int table_x = j * 4; \ \ CLAMP(y, 0, in_h_int - 1); \ \ for(int n = -1; n < 3; n++) \ { \ float r2 = bspline_x[table_x++]; \ int x = in_x1_int + i_x + n; \ float r_square = r1 * r2; \ \ CLAMP(x, 0, in_w_int - 1); \ \ output1 += r_square * in_rows[y][x * components]; \ output2 += r_square * in_rows[y][x * components + 1]; \ output3 += r_square * in_rows[y][x * components + 2]; \ if(components == 4) \ output4 += r_square * in_rows[y][x * components + 3]; \ } \ } \ \ \ out_rows[i][j * components] = (type)output1; \ out_rows[i][j * components + 1] = (type)output2; \ out_rows[i][j * components + 2] = (type)output3; \ if(components == 4) \ out_rows[i][j * components + 3] = (type)output4; \ \ } \ } \ \ delete [] bspline_x; \ delete [] bspline_y; \ } // Pow function is not thread safe in Compaqt C #define CUBE(x) ((x) * (x) * (x)) float ScaleUnit::cubic_bspline(float x) { float a, b, c, d; if((x + 2.0F) <= 0.0F) { a = 0.0F; } else { a = CUBE(x + 2.0F); } if((x + 1.0F) <= 0.0F) { b = 0.0F; } else { b = CUBE(x + 1.0F); } if(x <= 0) { c = 0.0F; } else { c = CUBE(x); } if((x - 1.0F) <= 0.0F) { d = 0.0F; } else { d = CUBE(x - 1.0F); } return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0; } void ScaleUnit::tabulate_bspline(float* &table, float scale, int pixels, float coefficient) { table = new float[pixels * 4]; for(int i = 0, j = 0; i < pixels; i++) { float f_x = (float)i * scale; float a = f_x - floor(f_x); for(float m = -1; m < 3; m++) { table[j++] = cubic_bspline(coefficient * (m - a)); } } } void ScaleUnit::tabulate_blinear(int* &table_int, float* &table_frac, float* &table_antifrac, float scale, int pixel1, int pixel2, total_pixels) { table_int = new int[pixel2 - pixel1]; table_frac = new float[pixel2 - pixel1]; table_antifrac = new float[pixel2 - pixel1]; for(int i = pixel1, j = 0; i < pixel2; i++, j++) { float f_x = (float)i * scale; int i_x = (int)floor(f_x); float a = (f_x - floor(f_x)); table_int[j] = CLAMP(i_x, 0, total_pixels - 1); table_frac[j] = a; table_antifrac[j] = 1.0F - a; } } void ScaleUnit::process_package(LoadPackage *package) { ScalePackage *pkg = (ScalePackage*)package; //printf("ScaleUnit::process_package 1\n"); // Arguments for macros VFrame *output = engine->scale_output; VFrame *input = engine->scale_input; float scale_w = engine->w_scale; float scale_h = engine->h_scale; int in_x1_int = engine->in_x1_int; int in_y1_int = engine->in_y1_int; int out_h_int = engine->out_h_int; int out_w_int = engine->out_w_int; int do_yuv = (input->get_color_model() == BC_YUV888 || input->get_color_model() == BC_YUVA8888 || input->get_color_model() == BC_YUV161616 || input->get_color_model() == BC_YUVA16161616); //printf("ScaleUnit::process_package 2\n"); if(engine->interpolation_type == CUBIC_CUBIC || (engine->interpolation_type == CUBIC_LINEAR && engine->w_scale > 1 && engine->h_scale > 1)) { switch(engine->scale_input->get_color_model()) { case BC_RGB888: case BC_YUV888: BICUBIC(0xff, unsigned char, 3); break; case BC_RGBA8888: case BC_YUVA8888: BICUBIC(0xff, unsigned char, 4); break; case BC_RGB161616: case BC_YUV161616: BICUBIC(0xffff, uint16_t, 3); break; case BC_RGBA16161616: case BC_YUVA16161616: BICUBIC(0xffff, uint16_t, 4); break; } } else // Perform bilinear scaling input -> scale_output { switch(engine->scale_input->get_color_model()) { case BC_RGB888: case BC_YUV888: BILINEAR(0xff, unsigned char, 3); break; case BC_RGBA8888: case BC_YUVA8888: BILINEAR(0xff, unsigned char, 4); break; case BC_RGB161616: case BC_YUV161616: BILINEAR(0xffff, uint16_t, 3); break; case BC_RGBA16161616: case BC_YUVA16161616: BILINEAR(0xffff, uint16_t, 4); break; } } //printf("ScaleUnit::process_package 3\n"); } ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus) : LoadServer(cpus, cpus) { this->overlay = overlay; } ScaleEngine::~ScaleEngine() { } void ScaleEngine::init_packages() { for(int i = 0; i < total_packages; i++) { ScalePackage *package = (ScalePackage*)packages[i]; package->out_row1 = out_h_int / total_packages * i; package->out_row2 = package->out_row1 + out_h_int / total_packages; if(i >= total_packages - 1) package->out_row2 = out_h_int; } } LoadClient* ScaleEngine::new_client() { return new ScaleUnit(this, overlay); } LoadPackage* ScaleEngine::new_package() { return new ScalePackage; } TranslatePackage::TranslatePackage() { } TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay) : LoadClient(server) { this->overlay = overlay; this->engine = server; } TranslateUnit::~TranslateUnit() { } void TranslateUnit::translation_array(transfer_table* &table, float out_x1, float out_x2, float in_x1, float in_x2, int in_total, int out_total, int &out_x1_int, int &out_x2_int) { int out_w_int; float offset = out_x1 - in_x1; out_x1_int = (int)out_x1; out_x2_int = MIN((int)ceil(out_x2), out_total); out_w_int = out_x2_int - out_x1_int; table = new transfer_table[out_w_int]; bzero(table, sizeof(transfer_table) * out_w_int); //printf("OverlayFrame::translation_array 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2); float in_x = in_x1; for(int out_x = out_x1_int; out_x < out_x2_int; out_x++) { transfer_table *entry = &table[out_x - out_x1_int]; entry->in_x1 = (int)in_x; entry->in_x2 = (int)in_x + 1; // Get fraction of output pixel to fill entry->output_fraction = 1; if(out_x1 > out_x) { entry->output_fraction -= out_x1 - out_x; } if(out_x2 < out_x + 1) { entry->output_fraction = (out_x2 - out_x); } // Advance in_x until out_x_fraction is filled float out_x_fraction = entry->output_fraction; float in_x_fraction = floor(in_x + 1) - in_x; if(out_x_fraction <= in_x_fraction) { entry->in_fraction1 = out_x_fraction; entry->in_fraction2 = 0.0; in_x += out_x_fraction; } else { entry->in_fraction1 = in_x_fraction; in_x += out_x_fraction; entry->in_fraction2 = in_x - floor(in_x); } // Clip in_x and zero out fraction. This doesn't work for YUV. if(entry->in_x2 >= in_total) { entry->in_x2 = in_total - 1; entry->in_fraction2 = 0.0; } if(entry->in_x1 >= in_total) { entry->in_x1 = in_total - 1; entry->in_fraction1 = 0.0; } // printf("OverlayFrame::translation_array 2 %d %d %d %f %f %f\n", // out_x, // entry->in_x1, // entry->in_x2, // entry->in_fraction1, // entry->in_fraction2, // entry->output_fraction); } } #define TRANSLATE(max, type, components) \ { \ \ type **in_rows = (type**)input->get_rows(); \ type **out_rows = (type**)output->get_rows(); \ \ /* printf("OverlayFrame::translate 1 %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", */ \ /* (in_x1), in_y1, in_x2, in_y2, out_x1, out_y1, out_x2, out_y2); */ \ \ unsigned int master_opacity = (int)(alpha * max + 0.5); \ unsigned int master_transparency = max - master_opacity; \ type zero_r, zero_g, zero_b, zero_a; \ zero_r = 0; \ zero_b = ((max + 1) >> 1) * (do_yuv); \ zero_g = ((max + 1) >> 1) * (do_yuv); \ if(components == 4) \ zero_a = 0; \ \ /* printf("TRANSLATE %d\n", mode); */ \ \ for(int i = row1; i < row2; i++) \ { \ int in_y1 = y_table[i - out_y1_int].in_x1; \ int in_y2 = y_table[i - out_y1_int].in_x2; \ float y_fraction1 = y_table[i - out_y1_int].in_fraction1; \ float y_fraction2 = y_table[i - out_y1_int].in_fraction2; \ float y_output_fraction = y_table[i - out_y1_int].output_fraction; \ type *in_row1 = in_rows[(in_y1)]; \ type *in_row2 = in_rows[(in_y2)]; \ type *out_row = out_rows[i]; \ \ for(int j = out_x1_int; j < out_x2_int; j++) \ { \ int in_x1 = x_table[j - out_x1_int].in_x1; \ int in_x2 = x_table[j - out_x1_int].in_x2; \ float x_fraction1 = x_table[j - out_x1_int].in_fraction1; \ float x_fraction2 = x_table[j - out_x1_int].in_fraction2; \ float x_output_fraction = x_table[j - out_x1_int].output_fraction; \ type *output = &out_row[j * components]; \ int input1, input2, input3, input4; \ \ input1 = (int)(in_row1[in_x1 * components] * x_fraction1 * y_fraction1 + \ in_row1[in_x2 * components] * x_fraction2 * y_fraction1 + \ in_row2[in_x1 * components] * x_fraction1 * y_fraction2 + \ in_row2[in_x2 * components] * x_fraction2 * y_fraction2 + 0.5); \ input2 = (int)(in_row1[in_x1 * components + 1] * x_fraction1 * y_fraction1 + \ in_row1[in_x2 * components + 1] * x_fraction2 * y_fraction1 + \ in_row2[in_x1 * components + 1] * x_fraction1 * y_fraction2 + \ in_row2[in_x2 * components + 1] * x_fraction2 * y_fraction2 + 0.5); \ input3 = (int)(in_row1[in_x1 * components + 2] * x_fraction1 * y_fraction1 + \ in_row1[in_x2 * components + 2] * x_fraction2 * y_fraction1 + \ in_row2[in_x1 * components + 2] * x_fraction1 * y_fraction2 + \ in_row2[in_x2 * components + 2] * x_fraction2 * y_fraction2 + 0.5); \ if(components == 4) \ input4 = (int)(in_row1[in_x1 * components + 3] * x_fraction1 * y_fraction1 + \ in_row1[in_x2 * components + 3] * x_fraction2 * y_fraction1 + \ in_row2[in_x1 * components + 3] * x_fraction1 * y_fraction2 + \ in_row2[in_x2 * components + 3] * x_fraction2 * y_fraction2 + 0.5); \ \ unsigned int opacity = (int)(master_opacity * \ y_output_fraction * \ x_output_fraction + 0.5); \ unsigned int transparency = max - opacity; \ \ /* if(opacity != max) printf("TRANSLATE %x %d %d\n", opacity, j, i); */ \ \ if(components == 3) \ { \ BLEND_3(max, type); \ } \ else \ { \ BLEND_4(max, type); \ } \ } \ } \ } void TranslateUnit::process_package(LoadPackage *package) { TranslatePackage *pkg = (TranslatePackage*)package; int out_y1_int; int out_y2_int; int out_x1_int; int out_x2_int; // Variables for TRANSLATE VFrame *input = engine->translate_input; VFrame *output = engine->translate_output; float in_x1 = engine->translate_in_x1; float in_y1 = engine->translate_in_y1; float in_x2 = engine->translate_in_x2; float in_y2 = engine->translate_in_y2; float out_x1 = engine->translate_out_x1; float out_y1 = engine->translate_out_y1; float out_x2 = engine->translate_out_x2; float out_y2 = engine->translate_out_y2; float alpha = engine->translate_alpha; int row1 = pkg->out_row1; int row2 = pkg->out_row2; int mode = engine->translate_mode; int in_total_x = input->get_w(); int in_total_y = input->get_h(); int do_yuv = (engine->translate_input->get_color_model() == BC_YUV888 || engine->translate_input->get_color_model() == BC_YUVA8888 || engine->translate_input->get_color_model() == BC_YUV161616 || engine->translate_input->get_color_model() == BC_YUVA16161616); transfer_table *x_table; transfer_table *y_table; translation_array(x_table, out_x1, out_x2, in_x1, in_x2, in_total_x, output->get_w(), out_x1_int, out_x2_int); translation_array(y_table, out_y1, out_y2, in_y1, in_y2, in_total_y, output->get_h(), out_y1_int, out_y2_int); switch(engine->translate_input->get_color_model()) { case BC_RGB888: case BC_YUV888: TRANSLATE(0xff, unsigned char, 3); break; case BC_RGBA8888: case BC_YUVA8888: TRANSLATE(0xff, unsigned char, 4); break; case BC_RGB161616: case BC_YUV161616: TRANSLATE(0xffff, uint16_t, 3); break; case BC_RGBA16161616: case BC_YUVA16161616: TRANSLATE(0xffff, uint16_t, 4); break; } delete [] x_table; delete [] y_table; } TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus) : LoadServer(cpus, cpus) { this->overlay = overlay; } TranslateEngine::~TranslateEngine() { } void TranslateEngine::init_packages() { int out_y1_int = (int)translate_out_y1; int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h()); int out_h = out_y2_int - out_y1_int; for(int i = 0; i < total_packages; i++) { TranslatePackage *package = (TranslatePackage*)packages[i]; package->out_row1 = (int)(out_y1_int + out_h / total_packages * i); package->out_row2 = (int)((float)package->out_row1 + out_h / total_packages); if(i >= total_packages - 1) package->out_row2 = out_y2_int; } } LoadClient* TranslateEngine::new_client() { return new TranslateUnit(this, overlay); } LoadPackage* TranslateEngine::new_package() { return new TranslatePackage; } #define SCALE_TRANSLATE(max, type, components) \ { \ int64_t opacity = (int)(alpha * max + 0.5); \ int64_t transparency = max - opacity; \ int out_w = out_x2 - out_x1; \ \ for(int i = pkg->out_row1; i < pkg->out_row2; i++) \ { \ int in_y = y_table[i - out_y1]; \ type *in_row = (type*)in_rows[in_y] + in_x1 * components; \ type *out_row = (type*)out_rows[i] + out_x1 * components; \ \ /* X direction is scaled and requires a table lookup */ \ if(out_w != in_x2 - in_x1) \ { \ for(int j = 0; j < out_w; j++) \ { \ int in_x = x_table[j]; \ int input1, input2, input3, input4; \ type *output = out_row + j * components; \ \ input1 = in_row[in_x * components]; \ input2 = in_row[in_x * components + 1]; \ input3 = in_row[in_x * components + 2]; \ if(components == 4) \ input4 = in_row[in_x * components + 3]; \ \ if(components == 3) \ { \ BLEND_3(max, type); \ } \ else \ { \ BLEND_4(max, type); \ } \ } \ } \ else \ /* X direction is not scaled */ \ { \ for(int j = 0; j < out_w; j++) \ { \ int input1, input2, input3, input4; \ type *output = out_row + j * components; \ \ input1 = in_row[j * components]; \ input2 = in_row[j * components + 1]; \ input3 = in_row[j * components + 2]; \ if(components == 4) \ input4 = in_row[j * components + 3]; \ \ if(components == 3) \ { \ BLEND_3(max, type); \ } \ else \ { \ BLEND_4(max, type); \ } \ } \ } \ } \ } ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay) : LoadClient(server) { this->overlay = overlay; this->scale_translate = server; } ScaleTranslateUnit::~ScaleTranslateUnit() { } void ScaleTranslateUnit::scale_array(int* &table, int out_x1, int out_x2, int in_x1, int in_x2, int is_x) { float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1); table = new int[out_x2 - out_x1]; if(!is_x) { for(int i = 0; i < out_x2 - out_x1; i++) { table[i] = (int)((float)i / scale + in_x1); } } else { for(int i = 0; i < out_x2 - out_x1; i++) { table[i] = (int)((float)i / scale); } } } void ScaleTranslateUnit::process_package(LoadPackage *package) { ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package; // Args for NEAREST_NEIGHBOR_MACRO VFrame *output = scale_translate->output; VFrame *input = scale_translate->input; int in_x1 = scale_translate->in_x1; int in_y1 = scale_translate->in_y1; int in_x2 = scale_translate->in_x2; int in_y2 = scale_translate->in_y2; int out_x1 = scale_translate->out_x1; int out_y1 = scale_translate->out_y1; int out_x2 = scale_translate->out_x2; int out_y2 = scale_translate->out_y2; float alpha = scale_translate->alpha; int mode = scale_translate->mode; int *x_table; int *y_table; unsigned char **in_rows = input->get_rows(); unsigned char **out_rows = output->get_rows(); scale_array(x_table, out_x1, out_x2, in_x1, in_x2, 1); scale_array(y_table, out_y1, out_y2, in_y1, in_y2, 0); switch(input->get_color_model()) { case BC_RGB888: case BC_YUV888: SCALE_TRANSLATE(0xff, uint8_t, 3); break; case BC_RGBA8888: case BC_YUVA8888: SCALE_TRANSLATE(0xff, uint8_t, 4); break; case BC_RGB161616: case BC_YUV161616: SCALE_TRANSLATE(0xffff, uint16_t, 3); break; case BC_RGBA16161616: case BC_YUVA16161616: SCALE_TRANSLATE(0xffff, uint16_t, 4); break; } delete [] x_table; delete [] y_table; }; ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus) : LoadServer(cpus, cpus) { this->overlay = overlay; } ScaleTranslateEngine::~ScaleTranslateEngine() { } void ScaleTranslateEngine::init_packages() { int out_h = out_y2 - out_y1; for(int i = 0; i < total_packages; i++) { ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i]; package->out_row1 = (int)(out_y1 + out_h / total_packages * i); package->out_row2 = (int)((float)package->out_row1 + out_h / total_packages); if(i >= total_packages - 1) package->out_row2 = out_y2; } } LoadClient* ScaleTranslateEngine::new_client() { return new ScaleTranslateUnit(this, overlay); } LoadPackage* ScaleTranslateEngine::new_package() { return new ScaleTranslatePackage; } ScaleTranslatePackage::ScaleTranslatePackage() { } #define BLEND_ONLY(type, max, components) \ { \ int64_t opacity = (int)(alpha * max + 0.5); \ int64_t transparency = max - opacity; \ \ type** output_rows = (type**)output->get_rows(); \ type** input_rows = (type**)input->get_rows(); \ int w = input->get_w(); \ int h = input->get_h(); \ \ for(int i = pkg->out_row1; i < pkg->out_row2; i++) \ { \ type* in_row = input_rows[i]; \ type* output = output_rows[i]; \ \ for(int j = 0; j < w; j++) \ { \ int input1, input2, input3, input4; \ input1 = in_row[j * components]; \ input2 = in_row[j * components + 1]; \ input3 = in_row[j * components + 2]; \ if(components == 4) input4 = in_row[j * components + 3]; \ \ \ if(components == 3) \ { \ BLEND_3(max, type); \ } \ else \ { \ BLEND_4(max, type); \ } \ \ input += components; \ output += components; \ } \ } \ } BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay) : LoadClient(server) { this->overlay = overlay; this->blend_engine = server; } BlendUnit::~BlendUnit() { } void BlendUnit::process_package(LoadPackage *package) { BlendPackage *pkg = (BlendPackage*)package; VFrame *output = blend_engine->output; VFrame *input = blend_engine->input; float alpha = blend_engine->alpha; int mode = blend_engine->mode; switch(input->get_color_model()) { case BC_RGB888: case BC_YUV888: BLEND_ONLY(unsigned char, 0xff, 3); break; case BC_RGBA8888: case BC_YUVA8888: BLEND_ONLY(unsigned char, 0xff, 4); break; case BC_RGB161616: case BC_YUV161616: BLEND_ONLY(uint16_t, 0xffff, 3); break; case BC_RGBA16161616: case BC_YUVA16161616: BLEND_ONLY(uint16_t, 0xffff, 4); break; } } BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus) : LoadServer(cpus, cpus) { this->overlay = overlay; } BlendEngine::~BlendEngine() { } void BlendEngine::init_packages() { for(int i = 0; i < total_packages; i++) { BlendPackage *package = (BlendPackage*)packages[i]; package->out_row1 = (int)(input->get_h() / total_packages * i); package->out_row2 = (int)((float)package->out_row1 + input->get_h() / total_packages); if(i >= total_packages - 1) package->out_row2 = input->get_h(); } } LoadClient* BlendEngine::new_client() { return new BlendUnit(this, overlay); } LoadPackage* BlendEngine::new_package() { return new BlendPackage; } BlendPackage::BlendPackage() { }