1 #include "overlayframe.h"
3 /* Fully resampled scale / translate / blend ******************************/
4 /* resample into a temporary row vector, then blend */
6 #define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
7 float temp[oh*components]; \
8 temp_type opcty = fade * max + round, trnsp = max - opcty; \
9 type **output_rows = (type**)voutput->get_rows() + o1i; \
10 type **input_rows = (type**)vinput->get_rows(); \
12 for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
13 type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
14 float *tempp = temp; \
15 if( !k ) { /* direct copy case */ \
16 type *ip = input + i1i * components; \
17 for(int j = 0; j < oh; j++) { \
19 *tempp++ = *ip++ - ofs; \
20 *tempp++ = *ip++ - ofs; \
21 if( components == 4 ) *tempp++ = *ip++; \
24 else { /* resample */ \
25 for(int j = 0; j < oh; j++) { \
26 float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
27 int ki = lookup_sk[j], x = lookup_sx0[j]; \
28 type *ip = input + x * components; \
29 while(x < lookup_sx1[j]) { \
30 float kv = k[abs(ki >> INDEX_FRACTION)]; \
31 /* handle fractional pixels on edges of input */ \
32 if(x == i1i) kv *= i1f; \
33 if(++x == i2i) kv *= i2f; \
35 gacc += kv * (*ip++ - ofs); \
36 bacc += kv * (*ip++ - ofs); \
37 if( components == 4 ) { aacc += kv * *ip++; } \
40 float wacc = lookup_wacc[j]; \
41 *tempp++ = racc * wacc; \
42 *tempp++ = gacc * wacc; \
43 *tempp++ = bacc * wacc; \
44 if( components == 4 ) { *tempp++ = aacc * wacc; } \
48 /* handle fractional pixels on edges of output */ \
49 temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \
50 if( components == 4 ) temp[3] *= o1f; \
51 tempp = temp + (oh-1)*components; \
52 tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \
53 if( components == 4 ) tempp[3] *= o2f; \
56 for(int j = 0; j < oh; j++) { \
57 type *output = output_rows[j] + i * components; \
58 if( components == 4 ) { \
59 temp_type r, g, b, a; \
60 ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
61 ALPHA4_STORE(output, ofs, max); \
65 ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
66 ALPHA3_STORE(output, ofs, max); \
68 tempp += components; \
74 #define XBLEND_SAMPLE(FN) { \
75 switch(vinput->get_color_model()) { \
76 case BC_RGB_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 3, 0.f, 0.f); \
77 case BC_RGBA_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 4, 0.f, 0.f); \
78 case BC_RGB888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
79 case BC_YUV888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
80 case BC_RGBA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
81 case BC_YUVA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
82 case BC_RGB161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
83 case BC_YUV161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
84 case BC_RGBA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
85 case BC_YUVA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
91 SamplePackage::SamplePackage()
95 SampleUnit::SampleUnit(SampleEngine *server)
98 this->engine = server;
101 SampleUnit::~SampleUnit()
105 void SampleUnit::process_package(LoadPackage *package)
107 SamplePackage *pkg = (SamplePackage*)package;
109 float i1 = engine->in1;
110 float i2 = engine->in2;
111 float o1 = engine->out1;
112 float o2 = engine->out2;
114 if(i2 - i1 <= 0 || o2 - o1 <= 0)
117 VFrame *voutput = engine->output;
118 VFrame *vinput = engine->input;
119 int mode = engine->mode;
121 BC_CModels::has_alpha(vinput->get_color_model()) &&
122 mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
124 //int iw = vinput->get_w();
127 float i1f = 1.f - i1 + i1i;
128 float i2f = 1.f - i2i + i2;
132 float o1f = 1.f - o1 + o1i;
133 float o2f = 1.f - o2i + o2;
136 float *k = engine->kernel->lookup;
137 //float kw = engine->kernel->width;
138 //int kn = engine->kernel->n;
141 int *lookup_sx0 = engine->lookup_sx0;
142 int *lookup_sx1 = engine->lookup_sx1;
143 int *lookup_sk = engine->lookup_sk;
144 float *lookup_wacc = engine->lookup_wacc;
146 BLEND_SWITCH(XBLEND_SAMPLE);
150 SampleEngine::SampleEngine(int cpus)
151 : LoadServer(cpus, cpus)
160 SampleEngine::~SampleEngine()
162 if(lookup_sx0) delete [] lookup_sx0;
163 if(lookup_sx1) delete [] lookup_sx1;
164 if(lookup_sk) delete [] lookup_sk;
165 if(lookup_wacc) delete [] lookup_wacc;
169 * unlike the Direct and NN engines, the Sample engine works across
170 * output columns (it makes for more economical memory addressing
171 * during convolution)
173 void SampleEngine::init_packages()
175 int iw = input->get_w();
176 int i1i = floor(in1);
178 float i1f = 1.f - in1 + i1i;
179 float i2f = 1.f - i2i + in2;
181 int oy = floor(out1);
182 float oyf = out1 - oy;
183 int oh = ceil(out2) - oy;
185 float *k = kernel->lookup;
186 float kw = kernel->width;
189 if(in2 - in1 <= 0 || out2 - out1 <= 0)
192 /* determine kernel spatial coverage */
193 float scale = (out2 - out1) / (in2 - in1);
194 float iscale = (in2 - in1) / (out2 - out1);
195 float coverage = fabs(1.f / scale);
196 float bound = (coverage < 1.f ? kw : kw * coverage) - (.5f / TRANSFORM_SPP);
197 float coeff = (coverage < 1.f ? 1.f : scale) * TRANSFORM_SPP;
199 delete [] lookup_sx0;
200 delete [] lookup_sx1;
202 delete [] lookup_wacc;
204 lookup_sx0 = new int[oh];
205 lookup_sx1 = new int[oh];
206 lookup_sk = new int[oh];
207 lookup_wacc = new float[oh];
209 kd = (double)coeff * (1 << INDEX_FRACTION) + .5;
211 /* precompute kernel values and weight sums */
212 for(int i = 0; i < oh; i++) {
213 /* map destination back to source */
214 double sx = (i - oyf + .5) * iscale + in1 - .5;
217 * clip iteration to source area but not source plane. Points
218 * outside the source plane count as transparent. Points outside
219 * the source area don't count at all. The actual convolution
220 * later will be clipped to both, but we need to compute
223 int sx0 = mmax((int)floor(sx - bound) + 1, i1i);
224 int sx1 = mmin((int)ceil(sx + bound), i2i);
225 int ki = (double)(sx0 - sx) * coeff * (1 << INDEX_FRACTION)
226 + (1 << (INDEX_FRACTION - 1)) + .5;
232 for(int j= sx0; j < sx1; j++) {
233 int kv = (ki >> INDEX_FRACTION);
237 * the contribution of the first and last input pixel (if
238 * fractional) are linearly weighted by the fraction
240 float fk = k[abs(kv)];
241 wacc += j == i1i ? fk * i1f : j+1 == i2i ? fk * i2f : fk;
243 /* this is where we clip the kernel convolution to the source plane */
244 if(j >= 0 && j < iw) {
245 if(lookup_sx0[i] == -1) {
249 lookup_sx1[i] = j + 1;
254 lookup_wacc[i] = wacc > 0. ? 1. / wacc : 0.;
257 int cols = col_out2 - col_out1;
258 int pkgs = get_total_packages();
259 int col1 = col_out1, col2 = col1;
260 for(int i = 0; i < pkgs; col1=col2 ) {
261 SamplePackage *package = (SamplePackage*)get_package(i);
262 col2 = ++i * cols / pkgs + col_out1;
263 package->out_col1 = col1;
264 package->out_col2 = col2;
268 LoadClient* SampleEngine::new_client()
270 return new SampleUnit(this);
273 LoadPackage* SampleEngine::new_package()
275 return new SamplePackage;