Credit Andrew - improve in-tree documentation
[goodguy/cinelerra.git] / cinelerra / overlaysample.C
1 #include "overlayframe.h"
2
3 /* Fully resampled scale / translate / blend ******************************/
4 /* resample into a temporary row vector, then blend */
5
6 #define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
7         float temp[oh*components]; \
8         temp_type opcty = fade * max + round, trnsp = max - opcty; \
9         type **output_rows = (type**)voutput->get_rows() + o1i; \
10         type **input_rows = (type**)vinput->get_rows(); \
11  \
12         for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
13                 type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
14                 float *tempp = temp; \
15                 if( !k ) { /* direct copy case */ \
16                         type *ip = input + i1i * components; \
17                         for(int j = 0; j < oh; j++) { \
18                                 *tempp++ = *ip++; \
19                                 *tempp++ = *ip++ - ofs; \
20                                 *tempp++ = *ip++ - ofs; \
21                                 if( components == 4 ) *tempp++ = *ip++; \
22                         } \
23                 } \
24                 else { /* resample */ \
25                         for(int j = 0; j < oh; j++) { \
26                                 float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
27                                 int ki = lookup_sk[j], x = lookup_sx0[j]; \
28                                 type *ip = input + x * components; \
29                                 while(x < lookup_sx1[j]) { \
30                                         float kv = k[abs(ki >> INDEX_FRACTION)]; \
31                                         /* handle fractional pixels on edges of input */ \
32                                         if(x == i1i) kv *= i1f; \
33                                         if(++x == i2i) kv *= i2f; \
34                                         racc += kv * *ip++; \
35                                         gacc += kv * (*ip++ - ofs); \
36                                         bacc += kv * (*ip++ - ofs); \
37                                         if( components == 4 ) { aacc += kv * *ip++; } \
38                                         ki += kd; \
39                                 } \
40                                 float wacc = lookup_wacc[j]; \
41                                 *tempp++ = racc * wacc; \
42                                 *tempp++ = gacc * wacc; \
43                                 *tempp++ = bacc * wacc; \
44                                 if( components == 4 ) { *tempp++ = aacc * wacc; } \
45                         } \
46                 } \
47  \
48                 /* handle fractional pixels on edges of output */ \
49                 temp[0] *= o1f;   temp[1] *= o1f;   temp[2] *= o1f; \
50                 if( components == 4 ) temp[3] *= o1f; \
51                 tempp = temp + (oh-1)*components; \
52                 tempp[0] *= o2f;  tempp[1] *= o2f;  tempp[2] *= o2f; \
53                 if( components == 4 ) tempp[3] *= o2f; \
54                 tempp = temp; \
55                 /* blend output */ \
56                 for(int j = 0; j < oh; j++) { \
57                         type *output = output_rows[j] + i * components; \
58                         if( components == 4 ) { \
59                                 temp_type r, g, b, a; \
60                                 ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
61                                 ALPHA4_STORE(output, ofs, max); \
62                         } \
63                         else { \
64                                 temp_type r, g, b; \
65                                 ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
66                                 ALPHA3_STORE(output, ofs, max); \
67                         } \
68                         tempp += components; \
69                 } \
70         } \
71         break; \
72 }
73
74 #define XBLEND_SAMPLE(FN) { \
75         switch(vinput->get_color_model()) { \
76         case BC_RGB_FLOAT:      XSAMPLE(FN, z_float,   z_float,    1.f,    3, 0.f,    0.f); \
77         case BC_RGBA_FLOAT:     XSAMPLE(FN, z_float,   z_float,    1.f,    4, 0.f,    0.f); \
78         case BC_RGB888:         XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   3, 0,      .5f); \
79         case BC_YUV888:         XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   3, 0x80,   .5f); \
80         case BC_RGBA8888:       XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   4, 0,      .5f); \
81         case BC_YUVA8888:       XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   4, 0x80,   .5f); \
82         case BC_RGB161616:      XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0,      .5f); \
83         case BC_YUV161616:      XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
84         case BC_RGBA16161616:   XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0,      .5f); \
85         case BC_YUVA16161616:   XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
86         } \
87         break; \
88 }
89
90
91 SamplePackage::SamplePackage()
92 {
93 }
94
95 SampleUnit::SampleUnit(SampleEngine *server)
96  : LoadClient(server)
97 {
98         this->engine = server;
99 }
100
101 SampleUnit::~SampleUnit()
102 {
103 }
104
105 void SampleUnit::process_package(LoadPackage *package)
106 {
107         SamplePackage *pkg = (SamplePackage*)package;
108
109         float i1  = engine->in1;
110         float i2  = engine->in2;
111         float o1  = engine->out1;
112         float o2  = engine->out2;
113
114         if(i2 - i1 <= 0 || o2 - o1 <= 0)
115                 return;
116
117         VFrame *voutput = engine->output;
118         VFrame *vinput = engine->input;
119         int mode = engine->mode;
120         float fade =
121                 BC_CModels::has_alpha(vinput->get_color_model()) &&
122                 mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
123
124         //int   iw  = vinput->get_w();
125         int   i1i = floor(i1);
126         int   i2i = ceil(i2);
127         float i1f = 1.f - i1 + i1i;
128         float i2f = 1.f - i2i + i2;
129
130         int   o1i = floor(o1);
131         int   o2i = ceil(o2);
132         float o1f = 1.f - o1 + o1i;
133         float o2f = 1.f - o2i + o2;
134         int   oh  = o2i - o1i;
135
136         float *k  = engine->kernel->lookup;
137         //float kw  = engine->kernel->width;
138         //int   kn  = engine->kernel->n;
139         int   kd = engine->kd;
140
141         int *lookup_sx0 = engine->lookup_sx0;
142         int *lookup_sx1 = engine->lookup_sx1;
143         int *lookup_sk = engine->lookup_sk;
144         float *lookup_wacc = engine->lookup_wacc;
145
146         BLEND_SWITCH(XBLEND_SAMPLE);
147 }
148
149
150 SampleEngine::SampleEngine(int cpus)
151  : LoadServer(cpus, cpus)
152 {
153         lookup_sx0 = 0;
154         lookup_sx1 = 0;
155         lookup_sk = 0;
156         lookup_wacc = 0;
157         kd = 0;
158 }
159
160 SampleEngine::~SampleEngine()
161 {
162         if(lookup_sx0) delete [] lookup_sx0;
163         if(lookup_sx1) delete [] lookup_sx1;
164         if(lookup_sk) delete [] lookup_sk;
165         if(lookup_wacc) delete [] lookup_wacc;
166 }
167
168 /*
169  * unlike the Direct and NN engines, the Sample engine works across
170  * output columns (it makes for more economical memory addressing
171  * during convolution)
172  */
173 void SampleEngine::init_packages()
174 {
175         int   iw  = input->get_w();
176         int   i1i = floor(in1);
177         int   i2i = ceil(in2);
178         float i1f = 1.f - in1 + i1i;
179         float i2f = 1.f - i2i + in2;
180
181         int   oy  = floor(out1);
182         float oyf = out1 - oy;
183         int   oh  = ceil(out2) - oy;
184
185         float *k  = kernel->lookup;
186         float kw  = kernel->width;
187         int   kn  = kernel->n;
188
189         if(in2 - in1 <= 0 || out2 - out1 <= 0)
190                 return;
191
192         /* determine kernel spatial coverage */
193         float scale = (out2 - out1) / (in2 - in1);
194         float iscale = (in2 - in1) / (out2 - out1);
195         float coverage = fabs(1.f / scale);
196         float bound = (coverage < 1.f ? kw : kw * coverage) - (.5f / TRANSFORM_SPP);
197         float coeff = (coverage < 1.f ? 1.f : scale) * TRANSFORM_SPP;
198
199         delete [] lookup_sx0;
200         delete [] lookup_sx1;
201         delete [] lookup_sk;
202         delete [] lookup_wacc;
203
204         lookup_sx0 = new int[oh];
205         lookup_sx1 = new int[oh];
206         lookup_sk = new int[oh];
207         lookup_wacc = new float[oh];
208
209         kd = (double)coeff * (1 << INDEX_FRACTION) + .5;
210
211         /* precompute kernel values and weight sums */
212         for(int i = 0; i < oh; i++) {
213                 /* map destination back to source */
214                 double sx = (i - oyf + .5) * iscale + in1 - .5;
215
216                 /*
217                  * clip iteration to source area but not source plane. Points
218                  * outside the source plane count as transparent. Points outside
219                  * the source area don't count at all.  The actual convolution
220                  * later will be clipped to both, but we need to compute
221                  * weights.
222                  */
223                 int sx0 = mmax((int)floor(sx - bound) + 1, i1i);
224                 int sx1 = mmin((int)ceil(sx + bound), i2i);
225                 int ki = (double)(sx0 - sx) * coeff * (1 << INDEX_FRACTION)
226                                 + (1 << (INDEX_FRACTION - 1)) + .5;
227                 float wacc=0.;
228
229                 lookup_sx0[i] = -1;
230                 lookup_sx1[i] = -1;
231
232                 for(int j= sx0; j < sx1; j++) {
233                         int kv = (ki >> INDEX_FRACTION);
234                         if(kv > kn) break;
235                         if(kv >= -kn) {
236                                 /*
237                                  * the contribution of the first and last input pixel (if
238                                  * fractional) are linearly weighted by the fraction
239                                  */
240                                 float fk = k[abs(kv)];
241                                 wacc += j == i1i ? fk * i1f : j+1 == i2i ? fk * i2f : fk;
242
243                                 /* this is where we clip the kernel convolution to the source plane */
244                                 if(j >= 0 && j < iw) {
245                                         if(lookup_sx0[i] == -1) {
246                                                 lookup_sx0[i] = j;
247                                                 lookup_sk[i] = ki;
248                                         }
249                                         lookup_sx1[i] = j + 1;
250                                 }
251                         }
252                         ki += kd;
253                 }
254                 lookup_wacc[i] = wacc > 0. ? 1. / wacc : 0.;
255         }
256
257         int cols = col_out2 - col_out1;
258         int pkgs = get_total_packages();
259         int col1 = col_out1, col2 = col1;
260         for(int i = 0; i < pkgs; col1=col2 ) {
261                 SamplePackage *package = (SamplePackage*)get_package(i);
262                 col2 = ++i * cols / pkgs + col_out1;
263                 package->out_col1 = col1;
264                 package->out_col2 = col2;
265         }
266 }
267
268 LoadClient* SampleEngine::new_client()
269 {
270         return new SampleUnit(this);
271 }
272
273 LoadPackage* SampleEngine::new_package()
274 {
275         return new SamplePackage;
276 }
277
278