cinelerra/overlaysample.C

   1 #include "overlayframe.h"
   2
   3 /* Fully resampled scale / translate / blend ******************************/
   4 /* resample into a temporary row vector, then blend */
   5
   6 #define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
   7         float temp[oh*components]; \
   8         temp_type opcty = fade * max + round, trnsp = max - opcty; \
   9         type **output_rows = (type**)voutput->get_rows() + o1i; \
  10         type **input_rows = (type**)vinput->get_rows(); \
  11  \
  12         for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
  13                 type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
  14                 float *tempp = temp; \
  15                 if( !k ) { /* direct copy case */ \
  16                         type *ip = input + i1i * components; \
  17                         for(int j = 0; j < oh; j++) { \
  18                                 *tempp++ = *ip++; \
  19                                 *tempp++ = *ip++ - ofs; \
  20                                 *tempp++ = *ip++ - ofs; \
  21                                 if( components == 4 ) *tempp++ = *ip++; \
  22                         } \
  23                 } \
  24                 else { /* resample */ \
  25                         for(int j = 0; j < oh; j++) { \
  26                                 float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
  27                                 int ki = lookup_sk[j], x = lookup_sx0[j]; \
  28                                 type *ip = input + x * components; \
  29                                 while(x < lookup_sx1[j]) { \
  30                                         float kv = k[abs(ki >> INDEX_FRACTION)]; \
  31                                         /* handle fractional pixels on edges of input */ \
  32                                         if(x == i1i) kv *= i1f; \
  33                                         if(++x == i2i) kv *= i2f; \
  34                                         racc += kv * *ip++; \
  35                                         gacc += kv * (*ip++ - ofs); \
  36                                         bacc += kv * (*ip++ - ofs); \
  37                                         if( components == 4 ) { aacc += kv * *ip++; } \
  38                                         ki += kd; \
  39                                 } \
  40                                 float wacc = lookup_wacc[j]; \
  41                                 *tempp++ = racc * wacc; \
  42                                 *tempp++ = gacc * wacc; \
  43                                 *tempp++ = bacc * wacc; \
  44                                 if( components == 4 ) { *tempp++ = aacc * wacc; } \
  45                         } \
  46                 } \
  47  \
  48                 /* handle fractional pixels on edges of output */ \
  49                 temp[0] *= o1f;   temp[1] *= o1f;   temp[2] *= o1f; \
  50                 if( components == 4 ) temp[3] *= o1f; \
  51                 tempp = temp + (oh-1)*components; \
  52                 tempp[0] *= o2f;  tempp[1] *= o2f;  tempp[2] *= o2f; \
  53                 if( components == 4 ) tempp[3] *= o2f; \
  54                 tempp = temp; \
  55                 /* blend output */ \
  56                 for(int j = 0; j < oh; j++) { \
  57                         type *output = output_rows[j] + i * components; \
  58                         if( components == 4 ) { \
  59                                 temp_type r, g, b, a; \
  60                                 ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
  61                                 ALPHA4_STORE(output, ofs, max); \
  62                         } \
  63                         else { \
  64                                 temp_type r, g, b; \
  65                                 ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
  66                                 ALPHA3_STORE(output, ofs, max); \
  67                         } \
  68                         tempp += components; \
  69                 } \
  70         } \
  71         break; \
  72 }
  73
  74 #define XBLEND_SAMPLE(FN) { \
  75         switch(vinput->get_color_model()) { \
  76         case BC_RGB_FLOAT:      XSAMPLE(FN, z_float,   z_float,    1.f,    3, 0.f,    0.f); \
  77         case BC_RGBA_FLOAT:     XSAMPLE(FN, z_float,   z_float,    1.f,    4, 0.f,    0.f); \
  78         case BC_RGB888:         XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   3, 0,      .5f); \
  79         case BC_YUV888:         XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   3, 0x80,   .5f); \
  80         case BC_RGBA8888:       XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   4, 0,      .5f); \
  81         case BC_YUVA8888:       XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   4, 0x80,   .5f); \
  82         case BC_RGB161616:      XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0,      .5f); \
  83         case BC_YUV161616:      XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
  84         case BC_RGBA16161616:   XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0,      .5f); \
  85         case BC_YUVA16161616:   XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
  86         } \
  87         break; \
  88 }
  89
  90
  91 SamplePackage::SamplePackage()
  92 {
  93 }
  94
  95 SampleUnit::SampleUnit(SampleEngine *server)
  96  : LoadClient(server)
  97 {
  98         this->engine = server;
  99 }
 100
 101 SampleUnit::~SampleUnit()
 102 {
 103 }
 104
 105 void SampleUnit::process_package(LoadPackage *package)
 106 {
 107         SamplePackage *pkg = (SamplePackage*)package;
 108
 109         float i1  = engine->in1;
 110         float i2  = engine->in2;
 111         float o1  = engine->out1;
 112         float o2  = engine->out2;
 113
 114         if(i2 - i1 <= 0 || o2 - o1 <= 0)
 115                 return;
 116
 117         VFrame *voutput = engine->output;
 118         VFrame *vinput = engine->input;
 119         int mode = engine->mode;
 120         float fade =
 121                 BC_CModels::has_alpha(vinput->get_color_model()) &&
 122                 mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
 123
 124         //int   iw  = vinput->get_w();
 125         int   i1i = floor(i1);
 126         int   i2i = ceil(i2);
 127         float i1f = 1.f - i1 + i1i;
 128         float i2f = 1.f - i2i + i2;
 129
 130         int   o1i = floor(o1);
 131         int   o2i = ceil(o2);
 132         float o1f = 1.f - o1 + o1i;
 133         float o2f = 1.f - o2i + o2;
 134         int   oh  = o2i - o1i;
 135
 136         float *k  = engine->kernel->lookup;
 137         //float kw  = engine->kernel->width;
 138         //int   kn  = engine->kernel->n;
 139         int   kd = engine->kd;
 140
 141         int *lookup_sx0 = engine->lookup_sx0;
 142         int *lookup_sx1 = engine->lookup_sx1;
 143         int *lookup_sk = engine->lookup_sk;
 144         float *lookup_wacc = engine->lookup_wacc;
 145
 146         BLEND_SWITCH(XBLEND_SAMPLE);
 147 }
 148
 149
 150 SampleEngine::SampleEngine(int cpus)
 151  : LoadServer(cpus, cpus)
 152 {
 153         lookup_sx0 = 0;
 154         lookup_sx1 = 0;
 155         lookup_sk = 0;
 156         lookup_wacc = 0;
 157         kd = 0;
 158 }
 159
 160 SampleEngine::~SampleEngine()
 161 {
 162         if(lookup_sx0) delete [] lookup_sx0;
 163         if(lookup_sx1) delete [] lookup_sx1;
 164         if(lookup_sk) delete [] lookup_sk;
 165         if(lookup_wacc) delete [] lookup_wacc;
 166 }
 167
 168 /*
 169  * unlike the Direct and NN engines, the Sample engine works across
 170  * output columns (it makes for more economical memory addressing
 171  * during convolution)
 172  */
 173 void SampleEngine::init_packages()
 174 {
 175         int   iw  = input->get_w();
 176         int   i1i = floor(in1);
 177         int   i2i = ceil(in2);
 178         float i1f = 1.f - in1 + i1i;
 179         float i2f = 1.f - i2i + in2;
 180
 181         int   oy  = floor(out1);
 182         float oyf = out1 - oy;
 183         int   oh  = ceil(out2) - oy;
 184
 185         float *k  = kernel->lookup;
 186         float kw  = kernel->width;
 187         int   kn  = kernel->n;
 188
 189         if(in2 - in1 <= 0 || out2 - out1 <= 0)
 190                 return;
 191
 192         /* determine kernel spatial coverage */
 193         float scale = (out2 - out1) / (in2 - in1);
 194         float iscale = (in2 - in1) / (out2 - out1);
 195         float coverage = fabs(1.f / scale);
 196         float bound = (coverage < 1.f ? kw : kw * coverage) - (.5f / TRANSFORM_SPP);
 197         float coeff = (coverage < 1.f ? 1.f : scale) * TRANSFORM_SPP;
 198
 199         delete [] lookup_sx0;
 200         delete [] lookup_sx1;
 201         delete [] lookup_sk;
 202         delete [] lookup_wacc;
 203
 204         lookup_sx0 = new int[oh];
 205         lookup_sx1 = new int[oh];
 206         lookup_sk = new int[oh];
 207         lookup_wacc = new float[oh];
 208
 209         kd = (double)coeff * (1 << INDEX_FRACTION) + .5;
 210
 211         /* precompute kernel values and weight sums */
 212         for(int i = 0; i < oh; i++) {
 213                 /* map destination back to source */
 214                 double sx = (i - oyf + .5) * iscale + in1 - .5;
 215
 216                 /*
 217                  * clip iteration to source area but not source plane. Points
 218                  * outside the source plane count as transparent. Points outside
 219                  * the source area don't count at all.  The actual convolution
 220                  * later will be clipped to both, but we need to compute
 221                  * weights.
 222                  */
 223                 int sx0 = mmax((int)floor(sx - bound) + 1, i1i);
 224                 int sx1 = mmin((int)ceil(sx + bound), i2i);
 225                 int ki = (double)(sx0 - sx) * coeff * (1 << INDEX_FRACTION)
 226                                 + (1 << (INDEX_FRACTION - 1)) + .5;
 227                 float wacc=0.;
 228
 229                 lookup_sx0[i] = -1;
 230                 lookup_sx1[i] = -1;
 231
 232                 for(int j= sx0; j < sx1; j++) {
 233                         int kv = (ki >> INDEX_FRACTION);
 234                         if(kv > kn) break;
 235                         if(kv >= -kn) {
 236                                 /*
 237                                  * the contribution of the first and last input pixel (if
 238                                  * fractional) are linearly weighted by the fraction
 239                                  */
 240                                 float fk = k[abs(kv)];
 241                                 wacc += j == i1i ? fk * i1f : j+1 == i2i ? fk * i2f : fk;
 242
 243                                 /* this is where we clip the kernel convolution to the source plane */
 244                                 if(j >= 0 && j < iw) {
 245                                         if(lookup_sx0[i] == -1) {
 246                                                 lookup_sx0[i] = j;
 247                                                 lookup_sk[i] = ki;
 248                                         }
 249                                         lookup_sx1[i] = j + 1;
 250                                 }
 251                         }
 252                         ki += kd;
 253                 }
 254                 lookup_wacc[i] = wacc > 0. ? 1. / wacc : 0.;
 255         }
 256
 257         int cols = col_out2 - col_out1;
 258         int pkgs = get_total_packages();
 259         int col1 = col_out1, col2 = col1;
 260         for(int i = 0; i < pkgs; col1=col2 ) {
 261                 SamplePackage *package = (SamplePackage*)get_package(i);
 262                 col2 = ++i * cols / pkgs + col_out1;
 263                 package->out_col1 = col1;
 264                 package->out_col2 = col2;
 265         }
 266 }
 267
 268 LoadClient* SampleEngine::new_client()
 269 {
 270         return new SampleUnit(this);
 271 }
 272
 273 LoadPackage* SampleEngine::new_package()
 274 {
 275         return new SamplePackage;
 276 }
 277
 278