1 #include "overlayframe.h"
3 /* Fully resampled scale / translate / blend ******************************/
4 /* resample into a temporary row vector, then blend */
6 #define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
7 float temp[oh*components]; \
8 temp_type opcty = fade * max + round, trnsp = max - opcty; \
9 type **output_rows = (type**)voutput->get_rows() + o1i; \
10 type **input_rows = (type**)vinput->get_rows(); \
12 for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
13 type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
14 float *tempp = temp; \
15 if( !k ) { /* direct copy case */ \
16 type *ip = input + i1i * components; \
17 for(int j = 0; j < oh; j++) { \
19 *tempp++ = *ip++ - ofs; \
20 *tempp++ = *ip++ - ofs; \
21 if( components == 4 ) *tempp++ = *ip++; \
24 else { /* resample */ \
25 for(int j = 0; j < oh; j++) { \
26 float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
27 int ki = lookup_sk[j], x = lookup_sx0[j]; \
28 type *ip = input + x * components; \
29 float wacc = 0, awacc = 0; \
30 while(x++ < lookup_sx1[j]) { \
31 float kv = k[abs(ki >> INDEX_FRACTION)]; \
32 /* handle fractional pixels on edges of input */ \
33 if(x == i1i) kv *= i1f; \
34 if(x + 1 == i2i) kv *= i2f; \
35 if( components == 4 ) { awacc += kv; kv *= ip[3]; } \
38 gacc += kv * (*ip++ - ofs); \
39 bacc += kv * (*ip++ - ofs); \
40 if( components == 4 ) { aacc += kv; ++ip; } \
43 if(wacc > 0.) wacc = 1. / wacc; \
44 *tempp++ = racc * wacc; \
45 *tempp++ = gacc * wacc; \
46 *tempp++ = bacc * wacc; \
47 if( components == 4 ) { \
48 if(awacc > 0.) awacc = 1. / awacc; \
49 *tempp++ = aacc * awacc; \
54 /* handle fractional pixels on edges of output */ \
55 temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \
56 if( components == 4 ) temp[3] *= o1f; \
57 tempp = temp + (oh-1)*components; \
58 tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \
59 if( components == 4 ) tempp[3] *= o2f; \
62 for(int j = 0; j < oh; j++) { \
63 type *output = output_rows[j] + i * components; \
64 if( components == 4 ) { \
65 temp_type r, g, b, a; \
66 ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
67 ALPHA4_STORE(output, ofs, max); \
71 ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
72 ALPHA3_STORE(output, ofs, max); \
74 tempp += components; \
80 #define XBLEND_SAMPLE(FN) { \
81 switch(vinput->get_color_model()) { \
82 case BC_RGB_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 3, 0.f, 0.f); \
83 case BC_RGBA_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 4, 0.f, 0.f); \
84 case BC_RGB888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
85 case BC_YUV888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
86 case BC_RGBA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
87 case BC_YUVA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
88 case BC_RGB161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
89 case BC_YUV161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
90 case BC_RGBA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
91 case BC_YUVA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
97 SamplePackage::SamplePackage()
101 SampleUnit::SampleUnit(SampleEngine *server)
104 this->engine = server;
107 SampleUnit::~SampleUnit()
111 void SampleUnit::process_package(LoadPackage *package)
113 SamplePackage *pkg = (SamplePackage*)package;
115 float i1 = engine->in1;
116 float i2 = engine->in2;
117 float o1 = engine->out1;
118 float o2 = engine->out2;
120 if(i2 - i1 <= 0 || o2 - o1 <= 0)
123 VFrame *voutput = engine->output;
124 VFrame *vinput = engine->input;
125 int mode = engine->mode;
127 BC_CModels::has_alpha(vinput->get_color_model()) &&
128 mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
130 //int iw = vinput->get_w();
133 float i1f = 1.f - i1 + i1i;
134 float i2f = 1.f - i2i + i2;
138 float o1f = 1.f - o1 + o1i;
139 float o2f = 1.f - o2i + o2;
142 float *k = engine->kernel->lookup;
143 //float kw = engine->kernel->width;
144 //int kn = engine->kernel->n;
147 int *lookup_sx0 = engine->lookup_sx0;
148 int *lookup_sx1 = engine->lookup_sx1;
149 int *lookup_sk = engine->lookup_sk;
150 //float *lookup_wacc = engine->lookup_wacc;
152 BLEND_SWITCH(XBLEND_SAMPLE);
156 SampleEngine::SampleEngine(int cpus)
157 : LoadServer(cpus, cpus)
166 SampleEngine::~SampleEngine()
168 if(lookup_sx0) delete [] lookup_sx0;
169 if(lookup_sx1) delete [] lookup_sx1;
170 if(lookup_sk) delete [] lookup_sk;
171 if(lookup_wacc) delete [] lookup_wacc;
175 * unlike the Direct and NN engines, the Sample engine works across
176 * output columns (it makes for more economical memory addressing
177 * during convolution)
179 void SampleEngine::init_packages()
181 int iw = input->get_w();
182 int i1i = floor(in1);
184 float i1f = 1.f - in1 + i1i;
185 float i2f = 1.f - i2i + in2;
187 int oy = floor(out1);
188 float oyf = out1 - oy;
189 int oh = ceil(out2) - oy;
191 float *k = kernel->lookup;
192 float kw = kernel->width;
195 if(in2 - in1 <= 0 || out2 - out1 <= 0)
198 /* determine kernel spatial coverage */
199 float scale = (out2 - out1) / (in2 - in1);
200 float iscale = (in2 - in1) / (out2 - out1);
201 float coverage = fabs(1.f / scale);
202 float bound = (coverage < 1.f ? kw : kw * coverage) - (.5f / TRANSFORM_SPP);
203 float coeff = (coverage < 1.f ? 1.f : scale) * TRANSFORM_SPP;
205 delete [] lookup_sx0;
206 delete [] lookup_sx1;
208 delete [] lookup_wacc;
210 lookup_sx0 = new int[oh];
211 lookup_sx1 = new int[oh];
212 lookup_sk = new int[oh];
213 lookup_wacc = new float[oh];
215 kd = (double)coeff * (1 << INDEX_FRACTION) + .5;
217 /* precompute kernel values and weight sums */
218 for(int i = 0; i < oh; i++) {
219 /* map destination back to source */
220 double sx = (i - oyf + .5) * iscale + in1 - .5;
223 * clip iteration to source area but not source plane. Points
224 * outside the source plane count as transparent. Points outside
225 * the source area don't count at all. The actual convolution
226 * later will be clipped to both, but we need to compute
229 int sx0 = mmax((int)floor(sx - bound) + 1, i1i);
230 int sx1 = mmin((int)ceil(sx + bound), i2i);
231 int ki = (double)(sx0 - sx) * coeff * (1 << INDEX_FRACTION)
232 + (1 << (INDEX_FRACTION - 1)) + .5;
238 for(int j= sx0; j < sx1; j++) {
239 int kv = (ki >> INDEX_FRACTION);
243 * the contribution of the first and last input pixel (if
244 * fractional) are linearly weighted by the fraction
247 wacc += k[abs(kv)] * i1f;
248 else if(j + 1 == i2i)
249 wacc += k[abs(kv)] * i2f;
253 /* this is where we clip the kernel convolution to the source plane */
254 if(j >= 0 && j < iw) {
255 if(lookup_sx0[i] == -1) {
259 lookup_sx1[i] = j + 1;
264 lookup_wacc[i] = wacc > 0. ? 1. / wacc : 0.;
267 int cols = col_out2 - col_out1;
268 int pkgs = get_total_packages();
269 int col1 = col_out1, col2 = col1;
270 for(int i = 0; i < pkgs; col1=col2 ) {
271 SamplePackage *package = (SamplePackage*)get_package(i);
272 col2 = ++i * cols / pkgs + col_out1;
273 package->out_col1 = col1;
274 package->out_col2 = col2;
278 LoadClient* SampleEngine::new_client()
280 return new SampleUnit(this);
283 LoadPackage* SampleEngine::new_package()
285 return new SamplePackage;