export TOPDIR ?= $(CURDIR)/..
include $(TOPDIR)/global_config
-OBJS := \
+OVERLAYS := \
+ $(OBJDIR)/overlay_direct_rgb161616.o \
+ $(OBJDIR)/overlay_direct_rgb888.o \
+ $(OBJDIR)/overlay_direct_rgba16161616.o \
+ $(OBJDIR)/overlay_direct_rgba8888.o \
+ $(OBJDIR)/overlay_direct_rgba_float.o \
+ $(OBJDIR)/overlay_direct_rgb_float.o \
+ $(OBJDIR)/overlay_direct_yuv161616.o \
+ $(OBJDIR)/overlay_direct_yuv888.o \
+ $(OBJDIR)/overlay_direct_yuva16161616.o \
+ $(OBJDIR)/overlay_direct_yuva8888.o \
+ $(OBJDIR)/overlay_nearest_rgb161616.o \
+ $(OBJDIR)/overlay_nearest_rgb888.o \
+ $(OBJDIR)/overlay_nearest_rgba16161616.o \
+ $(OBJDIR)/overlay_nearest_rgba8888.o \
+ $(OBJDIR)/overlay_nearest_rgba_float.o \
+ $(OBJDIR)/overlay_nearest_rgb_float.o \
+ $(OBJDIR)/overlay_nearest_yuv161616.o \
+ $(OBJDIR)/overlay_nearest_yuv888.o \
+ $(OBJDIR)/overlay_nearest_yuva16161616.o \
+ $(OBJDIR)/overlay_nearest_yuva8888.o \
+ $(OBJDIR)/overlay_sample_rgb161616.o \
+ $(OBJDIR)/overlay_sample_rgb888.o \
+ $(OBJDIR)/overlay_sample_rgba16161616.o \
+ $(OBJDIR)/overlay_sample_rgba8888.o \
+ $(OBJDIR)/overlay_sample_rgba_float.o \
+ $(OBJDIR)/overlay_sample_rgb_float.o \
+ $(OBJDIR)/overlay_sample_yuv161616.o \
+ $(OBJDIR)/overlay_sample_yuv888.o \
+ $(OBJDIR)/overlay_sample_yuva16161616.o \
+ $(OBJDIR)/overlay_sample_yuva8888.o \
+
+OBJS := $(OVERLAYS) \
$(OBJDIR)/aattachmentpoint.o \
$(OBJDIR)/aautomation.o \
$(OBJDIR)/aboutprefs.o \
$(OBJDIR)/mwindowmove.o \
$(OBJDIR)/mwindow.o \
$(OBJDIR)/new.o \
- $(OBJDIR)/overlaydirect.o \
$(OBJDIR)/overlayframe.o \
+ $(OBJDIR)/overlaydirect.o \
$(OBJDIR)/overlaynearest.o \
$(OBJDIR)/overlaysample.o \
$(OBJDIR)/packagedispatcher.o \
tags:
- ctags -R -h default --langmap=c:+.inc . ../guicast/ ../libzmpeg3 ../plugins ../thirdparty/ffmpeg-* ../thirdparty/giflib-*
+ ctags -R -h default --langmap=c:+.inc . ../guicast/ ../libzmpeg3 ../plugins ../thirdparty/ffmpeg-*
+$(OBJDIR)/fileexr.o: BFLAGS:= -Wno-deprecated
+$(OBJDIR)/sha1.o: BFLAGS:= -O3
+$(OVERLAYS): BFLAGS:= -Ofast -g0
$(OBJDIR)/%.o: %.C
- $(CXX) `cat $(OBJDIR)/c_flags` -DMSGQUAL=$* -c $< -o $@
-
-
-$(OBJDIR)/fileexr.o: fileexr.C
- $(CXX) `cat $(OBJDIR)/c_flags` -Wno-deprecated -DMSGQUAL=$* -c $< -o $@
-
-$(OBJDIR)/sha1.o: sha1.C sha1.h
- $(CXX) `cat $(OBJDIR)/c_flags` -O3 -c $< -o $@
+ $(CXX) `cat $(OBJDIR)/c_flags` $(BFLAGS) -DMSGQUAL=$* -c $< -o $@
#lv2
ifneq ($(WANT_LV2),no)
$(CXX) `cat $(OBJDIR)/c_flags` $(GTK2_INCS) -DMSGQUAL=$* -c $< -o $@
$(OBJDIR)/shuttle.o: shuttle.C shuttle_keys.h
- $(CXX) `cat $(OBJDIR)/c_flags` -DMSGQUAL=$* -c $< -o $@
shuttle_keys.h: /usr/include/X11/keysymdef.h
sed < /usr/include/X11/keysymdef.h > shuttle_keys.h -f shuttle.sed
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f);
+void DirectUnit::rgb161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f);
+void DirectUnit::rgb888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_float, z_float, 1.f, 3, 0, 0.f);
+void DirectUnit::rgb_float() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f);
+void DirectUnit::rgba16161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f);
+void DirectUnit::rgba8888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_float, z_float, 1.f, 4, 0, 0.f);
+void DirectUnit::rgba_float() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f);
+void DirectUnit::yuv161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f);
+void DirectUnit::yuv888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f);
+void DirectUnit::yuva16161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f);
+void DirectUnit::yuva8888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f);
+void NNUnit::rgb161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f);
+void NNUnit::rgb888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_float, z_float, 1.f, 3, 0, 0.f);
+void NNUnit::rgb_float() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f);
+void NNUnit::rgba16161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f);
+void NNUnit::rgba8888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_float, z_float, 1.f, 4, 0, 0.f);
+void NNUnit::rgba_float() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f);
+void NNUnit::yuv161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f);
+void NNUnit::yuv888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f);
+void NNUnit::yuva16161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f);
+void NNUnit::yuva8888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f);
+void SampleUnit::rgb161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f);
+void SampleUnit::rgb888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_float, z_float, 1.f, 3, 0, 0.f);
+void SampleUnit::rgb_float() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f);
+void SampleUnit::rgba16161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f);
+void SampleUnit::rgba8888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_float, z_float, 1.f, 4, 0, 0.f);
+void SampleUnit::rgba_float() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f);
+void SampleUnit::yuv161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f);
+void SampleUnit::yuv888() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f);
+void SampleUnit::yuva16161616() { BLEND_SWITCH(BLEND); }
+
--- /dev/null
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f);
+void SampleUnit::yuva8888() { BLEND_SWITCH(BLEND); }
+
#include "overlayframe.h"
+#include "overlaydirect.h"
/* Direct translate / blend **********************************************/
-#define XBLEND(FN, temp_type, type, max, components, ofs, round) { \
- temp_type opcty = fade * max + round, trnsp = max - opcty; \
- type** output_rows = (type**)output->get_rows(); \
- type** input_rows = (type**)input->get_rows(); \
- ix *= components; ox *= components; \
- \
- for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \
- type* in_row = input_rows[i + iy] + ix; \
- type* output = output_rows[i] + ox; \
- for(int j = 0; j < ow; j++) { \
- if( components == 4 ) { \
- temp_type r, g, b, a; \
- ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
- ALPHA4_STORE(output, ofs, max); \
- } \
- else { \
- temp_type r, g, b; \
- ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
- ALPHA3_STORE(output, ofs, max); \
- } \
- in_row += components; output += components; \
- } \
- } \
- break; \
-}
-
-#define XBLEND_ONLY(FN) { \
- switch(input->get_color_model()) { \
- case BC_RGB_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 3, 0, 0.f); \
- case BC_RGBA_FLOAT: XBLEND(FN, z_float, z_float, 1.f, 4, 0, 0.f); \
- case BC_RGB888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
- case BC_YUV888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
- case BC_RGBA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
- case BC_YUVA8888: XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
- case BC_RGB161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
- case BC_YUV161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
- case BC_RGBA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
- case BC_YUVA16161616: XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
- } \
- break; \
-}
-
DirectPackage::DirectPackage()
{
}
void DirectUnit::process_package(LoadPackage *package)
{
- DirectPackage *pkg = (DirectPackage*)package;
-
- VFrame *output = engine->output;
- VFrame *input = engine->input;
- int mode = engine->mode;
- float fade =
- BC_CModels::has_alpha(input->get_color_model()) &&
+ pkg = (DirectPackage*)package;
+ output = engine->output;
+ input = engine->input;
+ mode = engine->mode;
+ fade = BC_CModels::has_alpha(input->get_color_model()) &&
mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
-
- int ix = engine->in_x1;
- int ox = engine->out_x1;
- int ow = engine->out_x2 - ox;
- int iy = engine->in_y1 - engine->out_y1;
-
- BLEND_SWITCH(XBLEND_ONLY);
+ ix = engine->in_x1;
+ ox = engine->out_x1;
+ ow = engine->out_x2 - ox;
+ iy = engine->in_y1 - engine->out_y1;
+
+ switch(input->get_color_model()) {
+ case BC_RGB_FLOAT: rgb_float(); break;
+ case BC_RGBA_FLOAT: rgba_float(); break;
+ case BC_RGB888: rgb888(); break;
+ case BC_YUV888: yuv888(); break;
+ case BC_RGBA8888: rgba8888(); break;
+ case BC_YUVA8888: yuva8888(); break;
+ case BC_RGB161616: rgb161616(); break;
+ case BC_YUV161616: yuv161616(); break;
+ case BC_RGBA16161616: rgba16161616(); break;
+ case BC_YUVA16161616: yuva16161616(); break;
+ }
}
DirectEngine::DirectEngine(int cpus)
--- /dev/null
+#ifndef __OVERLAYDIRECT_H__
+#define __OVERLAYDIRECT_H__
+#include "overlayframe.h"
+
+#define XBLEND(FN, temp_type, type, max, components, ofs, round) { \
+ temp_type opcty = fade * max + round, trnsp = max - opcty; \
+ type** output_rows = (type**)output->get_rows(); \
+ type** input_rows = (type**)input->get_rows(); \
+ ix *= components; ox *= components; \
+ \
+ for( int i=pkg->out_row1; i<pkg->out_row2; ++i ) { \
+ type* in_row = input_rows[i + iy] + ix; \
+ type* output = output_rows[i] + ox; \
+ for( int j=ow; --j>=0; ) { \
+ if( components == 4 ) { \
+ temp_type r, g, b, a; \
+ ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+ ALPHA4_STORE(output, ofs, max); \
+ } \
+ else { \
+ temp_type r, g, b; \
+ ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+ ALPHA3_STORE(output, ofs, max); \
+ } \
+ in_row += components; output += components; \
+ } \
+ } \
+} break
+
+#endif
ALPHA_STORE(out, ofs, mx); \
out[3] = aclip(a, mx)
-
#define BLEND_SWITCH(FN) \
switch( mode ) { \
case TRANSFER_NORMAL: FN(NORMAL); \
void process_package(LoadPackage *package);
DirectEngine *engine;
+
+ DirectPackage *pkg;
+ int ix, iy, ox, ow;
+ VFrame *output, *input;
+ int mode;
+ float fade;
+
+ void rgb_float();
+ void rgba_float();
+ void rgb888();
+ void yuv888();
+ void rgba8888();
+ void yuva8888();
+ void rgb161616();
+ void yuv161616();
+ void rgba16161616();
+ void yuva16161616();
};
class NNUnit : public LoadClient
~NNUnit();
void process_package(LoadPackage *package);
-
NNEngine *engine;
+
+ NNPackage *pkg;
+ int ix, iy, ox, ow;
+ VFrame *output, *input;
+ int mode;
+ float fade;
+ int *ly;
+
+ void rgb_float();
+ void rgba_float();
+ void rgb888();
+ void yuv888();
+ void rgba8888();
+ void yuva8888();
+ void rgb161616();
+ void yuv161616();
+ void rgba16161616();
+ void yuva16161616();
};
class SampleUnit : public LoadClient
~SampleUnit();
void process_package(LoadPackage *package);
-
SampleEngine *engine;
+
+ SamplePackage *pkg;
+ VFrame *voutput, *vinput;
+ int mode;
+ float fade;
+
+ int i1i, i2i, o1i, o2i, oh, kd;
+ float i1f, i2f, o1f, o2f, *k;
+ int *lookup_sx0, *lookup_sx1, *lookup_sk;
+ float *lookup_wacc;
+
+ void rgb_float();
+ void rgba_float();
+ void rgb888();
+ void yuv888();
+ void rgba8888();
+ void yuva8888();
+ void rgb161616();
+ void yuv161616();
+ void rgba16161616();
+ void yuva16161616();
};
#include "overlayframe.h"
+#include "overlaynearest.h"
/* Nearest Neighbor scale / translate / blend ********************/
-#define XBLEND_3NN(FN, temp_type, type, max, components, ofs, round) { \
- temp_type opcty = fade * max + round, trnsp = max - opcty; \
- type** output_rows = (type**)output->get_rows(); \
- type** input_rows = (type**)input->get_rows(); \
- ox *= components; \
- \
- for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \
- int *lx = engine->in_lookup_x; \
- type* in_row = input_rows[*ly++]; \
- type* output = output_rows[i] + ox; \
- for(int j = 0; j < ow; j++) { \
- in_row += *lx++; \
- if( components == 4 ) { \
- temp_type r, g, b, a; \
- ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
- ALPHA4_STORE(output, ofs, max); \
- } \
- else { \
- temp_type r, g, b; \
- ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
- ALPHA3_STORE(output, ofs, max); \
- } \
- output += components; \
- } \
- } \
- break; \
-}
-
-#define XBLEND_NN(FN) { \
- switch(input->get_color_model()) { \
- case BC_RGB_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 3, 0, 0.f); \
- case BC_RGBA_FLOAT: XBLEND_3NN(FN, z_float, z_float, 1.f, 4, 0, 0.f); \
- case BC_RGB888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
- case BC_YUV888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
- case BC_RGBA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
- case BC_YUVA8888: XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
- case BC_RGB161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
- case BC_YUV161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
- case BC_RGBA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
- case BC_YUVA16161616: XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
- } \
- break; \
-}
-
NNPackage::NNPackage()
{
}
void NNUnit::process_package(LoadPackage *package)
{
- NNPackage *pkg = (NNPackage*)package;
- VFrame *output = engine->output;
- VFrame *input = engine->input;
- int mode = engine->mode;
- float fade =
- BC_CModels::has_alpha(input->get_color_model()) &&
+ pkg = (NNPackage*)package;
+ output = engine->output;
+ input = engine->input;
+ mode = engine->mode;
+ fade = BC_CModels::has_alpha(input->get_color_model()) &&
mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
- int ox = engine->out_x1i;
- int ow = engine->out_x2i - ox;
- int *ly = engine->in_lookup_y + pkg->out_row1;
+ ox = engine->out_x1i;
+ ow = engine->out_x2i - ox;
+ ly = engine->in_lookup_y + pkg->out_row1;
- BLEND_SWITCH(XBLEND_NN);
+ switch(input->get_color_model()) {
+ case BC_RGB_FLOAT: rgb_float(); break;
+ case BC_RGBA_FLOAT: rgba_float(); break;
+ case BC_RGB888: rgb888(); break;
+ case BC_YUV888: yuv888(); break;
+ case BC_RGBA8888: rgba8888(); break;
+ case BC_YUVA8888: yuva8888(); break;
+ case BC_RGB161616: rgb161616(); break;
+ case BC_YUV161616: yuv161616(); break;
+ case BC_RGBA16161616: rgba16161616(); break;
+ case BC_YUVA16161616: yuva16161616(); break;
+ }
}
NNEngine::NNEngine(int cpus)
--- /dev/null
+#ifndef __OVERLAYNEAREST_H__
+#define __OVERLAYNEAREST_H__
+#include "overlayframe.h"
+
+#define XBLEND_3NN(FN, temp_type, type, max, components, ofs, round) { \
+ temp_type opcty = fade * max + round, trnsp = max - opcty; \
+ type** output_rows = (type**)output->get_rows(); \
+ type** input_rows = (type**)input->get_rows(); \
+ ox *= components; \
+ \
+ for( int i=pkg->out_row1; i<pkg->out_row2; ++i ) { \
+ int *lx = engine->in_lookup_x; \
+ type* in_row = input_rows[*ly++]; \
+ type* output = output_rows[i] + ox; \
+ for( int j=ow; --j>=0; ) { \
+ in_row += *lx++; \
+ if( components == 4 ) { \
+ temp_type r, g, b, a; \
+ ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+ ALPHA4_STORE(output, ofs, max); \
+ } \
+ else { \
+ temp_type r, g, b; \
+ ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+ ALPHA3_STORE(output, ofs, max); \
+ } \
+ output += components; \
+ } \
+ } \
+} break
+
+#endif
#include "overlayframe.h"
+#include "overlaysample.h"
/* Fully resampled scale / translate / blend ******************************/
/* resample into a temporary row vector, then blend */
-#define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
- float temp[oh*components]; \
- temp_type opcty = fade * max + round, trnsp = max - opcty; \
- type **output_rows = (type**)voutput->get_rows() + o1i; \
- type **input_rows = (type**)vinput->get_rows(); \
- \
- for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
- type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
- float *tempp = temp; \
- if( !k ) { /* direct copy case */ \
- type *ip = input + i1i * components; \
- for(int j = 0; j < oh; j++) { \
- *tempp++ = *ip++; \
- *tempp++ = *ip++ - ofs; \
- *tempp++ = *ip++ - ofs; \
- if( components == 4 ) *tempp++ = *ip++; \
- } \
- } \
- else { /* resample */ \
- for(int j = 0; j < oh; j++) { \
- float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
- int ki = lookup_sk[j], x = lookup_sx0[j]; \
- type *ip = input + x * components; \
- while(x < lookup_sx1[j]) { \
- float kv = k[abs(ki >> INDEX_FRACTION)]; \
- /* handle fractional pixels on edges of input */ \
- if(x == i1i) kv *= i1f; \
- if(++x == i2i) kv *= i2f; \
- racc += kv * *ip++; \
- gacc += kv * (*ip++ - ofs); \
- bacc += kv * (*ip++ - ofs); \
- if( components == 4 ) { aacc += kv * *ip++; } \
- ki += kd; \
- } \
- float wacc = lookup_wacc[j]; \
- *tempp++ = racc * wacc; \
- *tempp++ = gacc * wacc; \
- *tempp++ = bacc * wacc; \
- if( components == 4 ) { *tempp++ = aacc * wacc; } \
- } \
- } \
- \
- /* handle fractional pixels on edges of output */ \
- temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \
- if( components == 4 ) temp[3] *= o1f; \
- tempp = temp + (oh-1)*components; \
- tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \
- if( components == 4 ) tempp[3] *= o2f; \
- tempp = temp; \
- /* blend output */ \
- for(int j = 0; j < oh; j++) { \
- type *output = output_rows[j] + i * components; \
- if( components == 4 ) { \
- temp_type r, g, b, a; \
- ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
- ALPHA4_STORE(output, ofs, max); \
- } \
- else { \
- temp_type r, g, b; \
- ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
- ALPHA3_STORE(output, ofs, max); \
- } \
- tempp += components; \
- } \
- } \
- break; \
-}
-
-#define XBLEND_SAMPLE(FN) { \
- switch(vinput->get_color_model()) { \
- case BC_RGB_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 3, 0.f, 0.f); \
- case BC_RGBA_FLOAT: XSAMPLE(FN, z_float, z_float, 1.f, 4, 0.f, 0.f); \
- case BC_RGB888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0, .5f); \
- case BC_YUV888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f); \
- case BC_RGBA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0, .5f); \
- case BC_YUVA8888: XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f); \
- case BC_RGB161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f); \
- case BC_YUV161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
- case BC_RGBA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f); \
- case BC_YUVA16161616: XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
- } \
- break; \
-}
-
-
SamplePackage::SamplePackage()
{
}
void SampleUnit::process_package(LoadPackage *package)
{
- SamplePackage *pkg = (SamplePackage*)package;
+ pkg = (SamplePackage*)package;
float i1 = engine->in1;
float i2 = engine->in2;
if(i2 - i1 <= 0 || o2 - o1 <= 0)
return;
- VFrame *voutput = engine->output;
- VFrame *vinput = engine->input;
- int mode = engine->mode;
- float fade =
- BC_CModels::has_alpha(vinput->get_color_model()) &&
+ voutput = engine->output;
+ vinput = engine->input;
+ mode = engine->mode;
+ fade = BC_CModels::has_alpha(vinput->get_color_model()) &&
mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
- //int iw = vinput->get_w();
- int i1i = floor(i1);
- int i2i = ceil(i2);
- float i1f = 1.f - i1 + i1i;
- float i2f = 1.f - i2i + i2;
-
- int o1i = floor(o1);
- int o2i = ceil(o2);
- float o1f = 1.f - o1 + o1i;
- float o2f = 1.f - o2i + o2;
- int oh = o2i - o1i;
-
- float *k = engine->kernel->lookup;
- //float kw = engine->kernel->width;
- //int kn = engine->kernel->n;
- int kd = engine->kd;
-
- int *lookup_sx0 = engine->lookup_sx0;
- int *lookup_sx1 = engine->lookup_sx1;
- int *lookup_sk = engine->lookup_sk;
- float *lookup_wacc = engine->lookup_wacc;
-
- BLEND_SWITCH(XBLEND_SAMPLE);
+ //iw = vinput->get_w();
+ i1i = floor(i1);
+ i2i = ceil(i2);
+ i1f = 1.f - i1 + i1i;
+ i2f = 1.f - i2i + i2;
+
+ o1i = floor(o1);
+ o2i = ceil(o2);
+ o1f = 1.f - o1 + o1i;
+ o2f = 1.f - o2i + o2;
+ oh = o2i - o1i;
+
+ k = engine->kernel->lookup;
+ //kw = engine->kernel->width;
+ //kn = engine->kernel->n;
+ kd = engine->kd;
+
+ lookup_sx0 = engine->lookup_sx0;
+ lookup_sx1 = engine->lookup_sx1;
+ lookup_sk = engine->lookup_sk;
+ lookup_wacc = engine->lookup_wacc;
+
+ switch( vinput->get_color_model() ) {
+ case BC_RGB_FLOAT: rgb_float(); break;
+ case BC_RGBA_FLOAT: rgba_float(); break;
+ case BC_RGB888: rgb888(); break;
+ case BC_YUV888: yuv888(); break;
+ case BC_RGBA8888: rgba8888(); break;
+ case BC_YUVA8888: yuva8888(); break;
+ case BC_RGB161616: rgb161616(); break;
+ case BC_YUV161616: yuv161616(); break;
+ case BC_RGBA16161616: rgba16161616(); break;
+ case BC_YUVA16161616: yuva16161616(); break;
+ }
}
--- /dev/null
+#ifndef __OVERLAYSAMPLE_H__
+#define __OVERLAYSAMPLE_H__
+#include "overlayframe.h"
+
+#define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
+ float temp[oh*components]; \
+ temp_type opcty = fade * max + round, trnsp = max - opcty; \
+ type **output_rows = (type**)voutput->get_rows() + o1i; \
+ type **input_rows = (type**)vinput->get_rows(); \
+ \
+ for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
+ type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
+ float *tempp = temp; \
+ if( !k ) { /* direct copy case */ \
+ type *ip = input + i1i * components; \
+ for( int j=oh; --j>=0; ) { \
+ *tempp++ = *ip++; \
+ *tempp++ = *ip++ - ofs; \
+ *tempp++ = *ip++ - ofs; \
+ if( components == 4 ) *tempp++ = *ip++; \
+ } \
+ } \
+ else { /* resample */ \
+ for( int j=0; j<oh; ++j ) { \
+ float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
+ int ki = lookup_sk[j], x = lookup_sx0[j]; \
+ type *ip = input + x * components; \
+ while(x < lookup_sx1[j]) { \
+ float kv = k[abs(ki >> INDEX_FRACTION)]; \
+ /* handle fractional pixels on edges of input */ \
+ if(x == i1i) kv *= i1f; \
+ if(++x == i2i) kv *= i2f; \
+ racc += kv * *ip++; \
+ gacc += kv * (*ip++ - ofs); \
+ bacc += kv * (*ip++ - ofs); \
+ if( components == 4 ) { aacc += kv * *ip++; } \
+ ki += kd; \
+ } \
+ float wacc = lookup_wacc[j]; \
+ *tempp++ = racc * wacc; \
+ *tempp++ = gacc * wacc; \
+ *tempp++ = bacc * wacc; \
+ if( components == 4 ) { *tempp++ = aacc * wacc; } \
+ } \
+ } \
+ \
+ /* handle fractional pixels on edges of output */ \
+ temp[0] *= o1f; temp[1] *= o1f; temp[2] *= o1f; \
+ if( components == 4 ) temp[3] *= o1f; \
+ tempp = temp + (oh-1)*components; \
+ tempp[0] *= o2f; tempp[1] *= o2f; tempp[2] *= o2f; \
+ if( components == 4 ) tempp[3] *= o2f; \
+ tempp = temp; \
+ /* blend output */ \
+ for( int j=0; j<oh; ++j ) { \
+ type *output = output_rows[j] + i * components; \
+ if( components == 4 ) { \
+ temp_type r, g, b, a; \
+ ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
+ ALPHA4_STORE(output, ofs, max); \
+ } \
+ else { \
+ temp_type r, g, b; \
+ ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
+ ALPHA3_STORE(output, ofs, max); \
+ } \
+ tempp += components; \
+ } \
+ } \
+} break
+
+#endif
clean:
rm -rf $(OBJDIR) xfer*.[Ch]
+BFLAGS:=-Ofast
+
$(OBJDIR)/%.o: %.C
- $(CXX) -I.. `cat $(OBJDIR)/c_flags` -O3 -DMSGQUAL=$* -c $< -o $@
+ $(CXX) -I.. `cat $(OBJDIR)/c_flags` $(BFLAGS) -DMSGQUAL=$* -c $< -o $@
$(OUTPUT): $(patsubst %.C,$(OBJDIR)/%.o,$(wildcard *.C))
touch $@
-CFLAGS += -I../ -I$(CINELERRA) -I$(GUICAST) -I../colors -fPIC
+CFLAGS += -I../ -I$(CINELERRA) -I$(GUICAST) -I../colors -fPIC -Ofast
CFLAGS += $(static_incs)
LFLAGS += $(static_libs)