rework overlayframe for parallel build, use -Ofast on frame xfers and plugins
authorGood Guy <good1.2guy@gmail.com>
Sat, 2 Mar 2019 04:39:41 +0000 (21:39 -0700)
committerGood Guy <good1.2guy@gmail.com>
Sat, 2 Mar 2019 04:39:41 +0000 (21:39 -0700)
40 files changed:
cinelerra-5.1/cinelerra/Makefile
cinelerra-5.1/cinelerra/overlay_direct_rgb161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_rgb888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_rgb_float.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_rgba16161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_rgba8888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_rgba_float.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_yuv161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_yuv888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_yuva16161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_direct_yuva8888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_rgb161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_rgb888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_rgb_float.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_rgba16161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_rgba8888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_rgba_float.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_yuv161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_yuv888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_yuva16161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_nearest_yuva8888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_rgb161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_rgb888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_rgb_float.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_rgba16161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_rgba8888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_rgba_float.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_yuv161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_yuv888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_yuva16161616.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlay_sample_yuva8888.C [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlaydirect.C
cinelerra-5.1/cinelerra/overlaydirect.h [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlayframe.h
cinelerra-5.1/cinelerra/overlaynearest.C
cinelerra-5.1/cinelerra/overlaynearest.h [new file with mode: 0644]
cinelerra-5.1/cinelerra/overlaysample.C
cinelerra-5.1/cinelerra/overlaysample.h [new file with mode: 0644]
cinelerra-5.1/guicast/xfer/Makefile
cinelerra-5.1/plugin_config

index dfdfd23..be86c3c 100644 (file)
@@ -1,7 +1,39 @@
 export TOPDIR ?= $(CURDIR)/..
 include $(TOPDIR)/global_config
 
-OBJS := \
+OVERLAYS := \
+       $(OBJDIR)/overlay_direct_rgb161616.o \
+       $(OBJDIR)/overlay_direct_rgb888.o \
+       $(OBJDIR)/overlay_direct_rgba16161616.o \
+       $(OBJDIR)/overlay_direct_rgba8888.o \
+       $(OBJDIR)/overlay_direct_rgba_float.o \
+       $(OBJDIR)/overlay_direct_rgb_float.o \
+       $(OBJDIR)/overlay_direct_yuv161616.o \
+       $(OBJDIR)/overlay_direct_yuv888.o \
+       $(OBJDIR)/overlay_direct_yuva16161616.o \
+       $(OBJDIR)/overlay_direct_yuva8888.o \
+       $(OBJDIR)/overlay_nearest_rgb161616.o \
+       $(OBJDIR)/overlay_nearest_rgb888.o \
+       $(OBJDIR)/overlay_nearest_rgba16161616.o \
+       $(OBJDIR)/overlay_nearest_rgba8888.o \
+       $(OBJDIR)/overlay_nearest_rgba_float.o \
+       $(OBJDIR)/overlay_nearest_rgb_float.o \
+       $(OBJDIR)/overlay_nearest_yuv161616.o \
+       $(OBJDIR)/overlay_nearest_yuv888.o \
+       $(OBJDIR)/overlay_nearest_yuva16161616.o \
+       $(OBJDIR)/overlay_nearest_yuva8888.o \
+       $(OBJDIR)/overlay_sample_rgb161616.o \
+       $(OBJDIR)/overlay_sample_rgb888.o \
+       $(OBJDIR)/overlay_sample_rgba16161616.o \
+       $(OBJDIR)/overlay_sample_rgba8888.o \
+       $(OBJDIR)/overlay_sample_rgba_float.o \
+       $(OBJDIR)/overlay_sample_rgb_float.o \
+       $(OBJDIR)/overlay_sample_yuv161616.o \
+       $(OBJDIR)/overlay_sample_yuv888.o \
+       $(OBJDIR)/overlay_sample_yuva16161616.o \
+       $(OBJDIR)/overlay_sample_yuva8888.o \
+
+OBJS := $(OVERLAYS) \
        $(OBJDIR)/aattachmentpoint.o \
        $(OBJDIR)/aautomation.o \
        $(OBJDIR)/aboutprefs.o \
@@ -187,8 +219,8 @@ OBJS := \
        $(OBJDIR)/mwindowmove.o \
        $(OBJDIR)/mwindow.o \
        $(OBJDIR)/new.o \
-       $(OBJDIR)/overlaydirect.o \
        $(OBJDIR)/overlayframe.o \
+       $(OBJDIR)/overlaydirect.o \
        $(OBJDIR)/overlaynearest.o \
        $(OBJDIR)/overlaysample.o \
        $(OBJDIR)/packagedispatcher.o \
@@ -564,18 +596,14 @@ clean:
 
 
 tags:
-       ctags -R -h default --langmap=c:+.inc . ../guicast/ ../libzmpeg3 ../plugins ../thirdparty/ffmpeg-* ../thirdparty/giflib-*
+       ctags -R -h default --langmap=c:+.inc . ../guicast/ ../libzmpeg3 ../plugins ../thirdparty/ffmpeg-*
 
+$(OBJDIR)/fileexr.o:   BFLAGS:= -Wno-deprecated
+$(OBJDIR)/sha1.o:      BFLAGS:= -O3
+$(OVERLAYS):           BFLAGS:= -Ofast -g0
 
 $(OBJDIR)/%.o:         %.C
-       $(CXX) `cat $(OBJDIR)/c_flags` -DMSGQUAL=$* -c $< -o $@
-
-
-$(OBJDIR)/fileexr.o:   fileexr.C
-       $(CXX) `cat $(OBJDIR)/c_flags` -Wno-deprecated -DMSGQUAL=$* -c $< -o $@
-
-$(OBJDIR)/sha1.o:      sha1.C sha1.h
-       $(CXX) `cat $(OBJDIR)/c_flags` -O3 -c $< -o $@
+       $(CXX) `cat $(OBJDIR)/c_flags` $(BFLAGS) -DMSGQUAL=$* -c $< -o $@
 
 #lv2
 ifneq ($(WANT_LV2),no)
@@ -588,7 +616,6 @@ $(OBJDIR)/pluginlv2ui.o $(OBJDIR)/lv2ui.o:
        $(CXX) `cat $(OBJDIR)/c_flags` $(GTK2_INCS) -DMSGQUAL=$* -c $< -o $@
 
 $(OBJDIR)/shuttle.o:   shuttle.C shuttle_keys.h
-       $(CXX) `cat $(OBJDIR)/c_flags` -DMSGQUAL=$* -c $< -o $@
 shuttle_keys.h: /usr/include/X11/keysymdef.h
        sed < /usr/include/X11/keysymdef.h > shuttle_keys.h -f shuttle.sed
 
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgb161616.C b/cinelerra-5.1/cinelerra/overlay_direct_rgb161616.C
new file mode 100644 (file)
index 0000000..7d2d97e
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f);
+void DirectUnit::rgb161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgb888.C b/cinelerra-5.1/cinelerra/overlay_direct_rgb888.C
new file mode 100644 (file)
index 0000000..bce36c0
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build 
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t,  0xff,   3, 0, .5f);
+void DirectUnit::rgb888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgb_float.C b/cinelerra-5.1/cinelerra/overlay_direct_rgb_float.C
new file mode 100644 (file)
index 0000000..9b13df3
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_float, z_float, 1.f, 3, 0, 0.f);
+void DirectUnit::rgb_float() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgba16161616.C b/cinelerra-5.1/cinelerra/overlay_direct_rgba16161616.C
new file mode 100644 (file)
index 0000000..3ee04e6
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build 
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f);
+void DirectUnit::rgba16161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgba8888.C b/cinelerra-5.1/cinelerra/overlay_direct_rgba8888.C
new file mode 100644 (file)
index 0000000..cb984df
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t,  0xff, 4, 0, .5f);
+void DirectUnit::rgba8888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_rgba_float.C b/cinelerra-5.1/cinelerra/overlay_direct_rgba_float.C
new file mode 100644 (file)
index 0000000..f0c8afc
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_float, z_float, 1.f, 4, 0, 0.f);
+void DirectUnit::rgba_float() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuv161616.C b/cinelerra-5.1/cinelerra/overlay_direct_yuv161616.C
new file mode 100644 (file)
index 0000000..7d1673e
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f);
+void DirectUnit::yuv161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuv888.C b/cinelerra-5.1/cinelerra/overlay_direct_yuv888.C
new file mode 100644 (file)
index 0000000..19b98b0
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f);
+void DirectUnit::yuv888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuva16161616.C b/cinelerra-5.1/cinelerra/overlay_direct_yuva16161616.C
new file mode 100644 (file)
index 0000000..0c0c2e4
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f);
+void DirectUnit::yuva16161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_direct_yuva8888.C b/cinelerra-5.1/cinelerra/overlay_direct_yuva8888.C
new file mode 100644 (file)
index 0000000..b1e67c6
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaydirect.h"
+// parallel build
+#define BLEND(FN) XBLEND(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f);
+void DirectUnit::yuva8888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgb161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgb161616.C
new file mode 100644 (file)
index 0000000..27d4e04
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f);
+void NNUnit::rgb161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgb888.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgb888.C
new file mode 100644 (file)
index 0000000..5e651c7
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build 
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t,  0xff,   3, 0, .5f);
+void NNUnit::rgb888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgb_float.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgb_float.C
new file mode 100644 (file)
index 0000000..61eb333
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_float, z_float, 1.f, 3, 0, 0.f);
+void NNUnit::rgb_float() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgba16161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgba16161616.C
new file mode 100644 (file)
index 0000000..fe202aa
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build 
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f);
+void NNUnit::rgba16161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgba8888.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgba8888.C
new file mode 100644 (file)
index 0000000..2f87451
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t,  0xff, 4, 0, .5f);
+void NNUnit::rgba8888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_rgba_float.C b/cinelerra-5.1/cinelerra/overlay_nearest_rgba_float.C
new file mode 100644 (file)
index 0000000..0e12069
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_float, z_float, 1.f, 4, 0, 0.f);
+void NNUnit::rgba_float() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuv161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuv161616.C
new file mode 100644 (file)
index 0000000..6e5eee9
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f);
+void NNUnit::yuv161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuv888.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuv888.C
new file mode 100644 (file)
index 0000000..d5b1c96
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f);
+void NNUnit::yuv888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuva16161616.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuva16161616.C
new file mode 100644 (file)
index 0000000..0ce0d8e
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f);
+void NNUnit::yuva16161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_nearest_yuva8888.C b/cinelerra-5.1/cinelerra/overlay_nearest_yuva8888.C
new file mode 100644 (file)
index 0000000..755137a
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaynearest.h"
+// parallel build
+#define BLEND(FN) XBLEND_3NN(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f);
+void NNUnit::yuva8888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgb161616.C b/cinelerra-5.1/cinelerra/overlay_sample_rgb161616.C
new file mode 100644 (file)
index 0000000..ebec458
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0, .5f);
+void SampleUnit::rgb161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgb888.C b/cinelerra-5.1/cinelerra/overlay_sample_rgb888.C
new file mode 100644 (file)
index 0000000..a58ae5d
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build 
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   3, 0, .5f);
+void SampleUnit::rgb888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgb_float.C b/cinelerra-5.1/cinelerra/overlay_sample_rgb_float.C
new file mode 100644 (file)
index 0000000..5fd1713
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_float, z_float, 1.f, 3, 0, 0.f);
+void SampleUnit::rgb_float() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgba16161616.C b/cinelerra-5.1/cinelerra/overlay_sample_rgba16161616.C
new file mode 100644 (file)
index 0000000..8956c32
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build 
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0, .5f);
+void SampleUnit::rgba16161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgba8888.C b/cinelerra-5.1/cinelerra/overlay_sample_rgba8888.C
new file mode 100644 (file)
index 0000000..c3e625c
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff, 4, 0, .5f);
+void SampleUnit::rgba8888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_rgba_float.C b/cinelerra-5.1/cinelerra/overlay_sample_rgba_float.C
new file mode 100644 (file)
index 0000000..84d517e
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_float, z_float, 1.f, 4, 0, 0.f);
+void SampleUnit::rgba_float() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuv161616.C b/cinelerra-5.1/cinelerra/overlay_sample_yuv161616.C
new file mode 100644 (file)
index 0000000..be50f71
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f);
+void SampleUnit::yuv161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuv888.C b/cinelerra-5.1/cinelerra/overlay_sample_yuv888.C
new file mode 100644 (file)
index 0000000..3692bd2
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 3, 0x80, .5f);
+void SampleUnit::yuv888() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuva16161616.C b/cinelerra-5.1/cinelerra/overlay_sample_yuva16161616.C
new file mode 100644 (file)
index 0000000..2e18120
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f);
+void SampleUnit::yuva16161616() { BLEND_SWITCH(BLEND); }
+
diff --git a/cinelerra-5.1/cinelerra/overlay_sample_yuva8888.C b/cinelerra-5.1/cinelerra/overlay_sample_yuva8888.C
new file mode 100644 (file)
index 0000000..093b379
--- /dev/null
@@ -0,0 +1,5 @@
+#include "overlaysample.h"
+// parallel build
+#define BLEND(FN) XSAMPLE(FN, z_int32_t, z_uint8_t, 0xff, 4, 0x80, .5f);
+void SampleUnit::yuva8888() { BLEND_SWITCH(BLEND); }
+
index 3297fda..5324401 100644 (file)
@@ -1,49 +1,8 @@
 #include "overlayframe.h"
+#include "overlaydirect.h"
 
 /* Direct translate / blend **********************************************/
 
-#define XBLEND(FN, temp_type, type, max, components, ofs, round) { \
-       temp_type opcty = fade * max + round, trnsp = max - opcty; \
-       type** output_rows = (type**)output->get_rows(); \
-       type** input_rows = (type**)input->get_rows(); \
-       ix *= components;  ox *= components; \
- \
-       for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \
-               type* in_row = input_rows[i + iy] + ix; \
-               type* output = output_rows[i] + ox; \
-               for(int j = 0; j < ow; j++) { \
-                       if( components == 4 ) { \
-                               temp_type r, g, b, a; \
-                               ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
-                               ALPHA4_STORE(output, ofs, max); \
-                       } \
-                       else { \
-                               temp_type r, g, b; \
-                               ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
-                               ALPHA3_STORE(output, ofs, max); \
-                       } \
-                       in_row += components;  output += components; \
-               } \
-       } \
-       break; \
-}
-
-#define XBLEND_ONLY(FN) { \
-       switch(input->get_color_model()) { \
-       case BC_RGB_FLOAT:      XBLEND(FN, z_float,   z_float,    1.f,    3, 0,      0.f); \
-       case BC_RGBA_FLOAT:     XBLEND(FN, z_float,   z_float,    1.f,    4, 0,      0.f); \
-       case BC_RGB888:         XBLEND(FN, z_int32_t, z_uint8_t,  0xff,   3, 0,      .5f); \
-       case BC_YUV888:         XBLEND(FN, z_int32_t, z_uint8_t,  0xff,   3, 0x80,   .5f); \
-       case BC_RGBA8888:       XBLEND(FN, z_int32_t, z_uint8_t,  0xff,   4, 0,      .5f); \
-       case BC_YUVA8888:       XBLEND(FN, z_int32_t, z_uint8_t,  0xff,   4, 0x80,   .5f); \
-       case BC_RGB161616:      XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0,      .5f); \
-       case BC_YUV161616:      XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
-       case BC_RGBA16161616:   XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0,      .5f); \
-       case BC_YUVA16161616:   XBLEND(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
-       } \
-       break; \
-}
-
 DirectPackage::DirectPackage()
 {
 }
@@ -60,21 +19,29 @@ DirectUnit::~DirectUnit()
 
 void DirectUnit::process_package(LoadPackage *package)
 {
-       DirectPackage *pkg = (DirectPackage*)package;
-
-       VFrame *output = engine->output;
-       VFrame *input = engine->input;
-       int mode = engine->mode;
-       float fade =
-               BC_CModels::has_alpha(input->get_color_model()) &&
+       pkg = (DirectPackage*)package;
+       output = engine->output;
+       input = engine->input;
+       mode = engine->mode;
+       fade = BC_CModels::has_alpha(input->get_color_model()) &&
                mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
-
-       int ix = engine->in_x1;
-       int ox = engine->out_x1;
-       int ow = engine->out_x2 - ox;
-       int iy = engine->in_y1 - engine->out_y1;
-
-       BLEND_SWITCH(XBLEND_ONLY);
+       ix = engine->in_x1;
+       ox = engine->out_x1;
+       ow = engine->out_x2 - ox;
+       iy = engine->in_y1 - engine->out_y1;
+
+       switch(input->get_color_model()) {
+       case BC_RGB_FLOAT:      rgb_float();    break;
+       case BC_RGBA_FLOAT:     rgba_float();   break;
+       case BC_RGB888:         rgb888();       break;
+       case BC_YUV888:         yuv888();       break;
+       case BC_RGBA8888:       rgba8888();     break;
+       case BC_YUVA8888:       yuva8888();     break;
+       case BC_RGB161616:      rgb161616();    break;
+       case BC_YUV161616:      yuv161616();    break;
+       case BC_RGBA16161616:   rgba16161616(); break;
+       case BC_YUVA16161616:   yuva16161616();  break;
+       }
 }
 
 DirectEngine::DirectEngine(int cpus)
diff --git a/cinelerra-5.1/cinelerra/overlaydirect.h b/cinelerra-5.1/cinelerra/overlaydirect.h
new file mode 100644 (file)
index 0000000..ebdc30f
--- /dev/null
@@ -0,0 +1,30 @@
+#ifndef __OVERLAYDIRECT_H__
+#define __OVERLAYDIRECT_H__
+#include "overlayframe.h"
+
+#define XBLEND(FN, temp_type, type, max, components, ofs, round) { \
+       temp_type opcty = fade * max + round, trnsp = max - opcty; \
+       type** output_rows = (type**)output->get_rows(); \
+       type** input_rows = (type**)input->get_rows(); \
+       ix *= components;  ox *= components; \
+ \
+       for( int i=pkg->out_row1; i<pkg->out_row2; ++i ) { \
+               type* in_row = input_rows[i + iy] + ix; \
+               type* output = output_rows[i] + ox; \
+               for( int j=ow; --j>=0; ) { \
+                       if( components == 4 ) { \
+                               temp_type r, g, b, a; \
+                               ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+                               ALPHA4_STORE(output, ofs, max); \
+                       } \
+                       else { \
+                               temp_type r, g, b; \
+                               ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+                               ALPHA3_STORE(output, ofs, max); \
+                       } \
+                       in_row += components;  output += components; \
+               } \
+       } \
+} break
+
+#endif
index 620f8e1..13c2e4b 100644 (file)
@@ -334,7 +334,6 @@ ZTYP(float);        ZTYP(double);
   ALPHA_STORE(out, ofs, mx); \
   out[3] = aclip(a, mx)
 
-
 #define BLEND_SWITCH(FN) \
        switch( mode ) { \
         case TRANSFER_NORMAL:          FN(NORMAL); \
@@ -420,6 +419,23 @@ public:
 
        void process_package(LoadPackage *package);
        DirectEngine *engine;
+
+       DirectPackage *pkg;
+       int ix, iy, ox, ow;
+       VFrame *output, *input;
+       int mode;
+       float fade;
+
+       void rgb_float();
+       void rgba_float();
+       void rgb888();
+       void yuv888();
+       void rgba8888();
+       void yuva8888();
+       void rgb161616();
+       void yuv161616();
+       void rgba16161616();
+       void yuva16161616();
 };
 
 class NNUnit : public LoadClient
@@ -429,8 +445,25 @@ public:
        ~NNUnit();
 
        void process_package(LoadPackage *package);
-
        NNEngine *engine;
+
+       NNPackage *pkg;
+       int ix, iy, ox, ow;
+       VFrame *output, *input;
+       int mode;
+       float fade;
+       int *ly;
+
+       void rgb_float();
+       void rgba_float();
+       void rgb888();
+       void yuv888();
+       void rgba8888();
+       void yuva8888();
+       void rgb161616();
+       void yuv161616();
+       void rgba16161616();
+       void yuva16161616();
 };
 
 class SampleUnit : public LoadClient
@@ -440,8 +473,28 @@ public:
        ~SampleUnit();
 
        void process_package(LoadPackage *package);
-
        SampleEngine *engine;
+
+       SamplePackage *pkg;
+       VFrame *voutput, *vinput;
+       int mode;
+       float fade;
+
+       int i1i, i2i, o1i, o2i, oh, kd;
+       float i1f, i2f, o1f, o2f, *k;
+       int *lookup_sx0, *lookup_sx1, *lookup_sk;
+       float *lookup_wacc;
+
+       void rgb_float();
+       void rgba_float();
+       void rgb888();
+       void yuv888();
+       void rgba8888();
+       void yuva8888();
+       void rgb161616();
+       void yuv161616();
+       void rgba16161616();
+       void yuva16161616();
 };
 
 
index e4a842d..faa7a65 100644 (file)
@@ -1,51 +1,8 @@
 #include "overlayframe.h"
+#include "overlaynearest.h"
 
 /* Nearest Neighbor scale / translate / blend ********************/
 
-#define XBLEND_3NN(FN, temp_type, type, max, components, ofs, round) { \
-       temp_type opcty = fade * max + round, trnsp = max - opcty; \
-       type** output_rows = (type**)output->get_rows(); \
-       type** input_rows = (type**)input->get_rows(); \
-       ox *= components; \
- \
-       for(int i = pkg->out_row1; i < pkg->out_row2; i++) { \
-               int *lx = engine->in_lookup_x; \
-               type* in_row = input_rows[*ly++]; \
-               type* output = output_rows[i] + ox; \
-               for(int j = 0; j < ow; j++) { \
-                       in_row += *lx++; \
-                       if( components == 4 ) { \
-                               temp_type r, g, b, a; \
-                               ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
-                               ALPHA4_STORE(output, ofs, max); \
-                       } \
-                       else { \
-                               temp_type r, g, b; \
-                               ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
-                               ALPHA3_STORE(output, ofs, max); \
-                       } \
-                       output += components; \
-               } \
-       } \
-       break; \
-}
-
-#define XBLEND_NN(FN) { \
-       switch(input->get_color_model()) { \
-       case BC_RGB_FLOAT:      XBLEND_3NN(FN, z_float,   z_float,    1.f,    3, 0,       0.f); \
-       case BC_RGBA_FLOAT:     XBLEND_3NN(FN, z_float,   z_float,    1.f,    4, 0,       0.f); \
-       case BC_RGB888:         XBLEND_3NN(FN, z_int32_t, z_uint8_t,  0xff,   3, 0,      .5f); \
-       case BC_YUV888:         XBLEND_3NN(FN, z_int32_t, z_uint8_t,  0xff,   3, 0x80,   .5f); \
-       case BC_RGBA8888:       XBLEND_3NN(FN, z_int32_t, z_uint8_t,  0xff,   4, 0,      .5f); \
-       case BC_YUVA8888:       XBLEND_3NN(FN, z_int32_t, z_uint8_t,  0xff,   4, 0x80,   .5f); \
-       case BC_RGB161616:      XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0,      .5f); \
-       case BC_YUV161616:      XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
-       case BC_RGBA16161616:   XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0,      .5f); \
-       case BC_YUVA16161616:   XBLEND_3NN(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
-       } \
-       break; \
-}
-
 NNPackage::NNPackage()
 {
 }
@@ -62,19 +19,29 @@ NNUnit::~NNUnit()
 
 void NNUnit::process_package(LoadPackage *package)
 {
-       NNPackage *pkg = (NNPackage*)package;
-       VFrame *output = engine->output;
-       VFrame *input = engine->input;
-       int mode = engine->mode;
-       float fade =
-               BC_CModels::has_alpha(input->get_color_model()) &&
+       pkg = (NNPackage*)package;
+       output = engine->output;
+       input = engine->input;
+       mode = engine->mode;
+       fade = BC_CModels::has_alpha(input->get_color_model()) &&
                mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
 
-       int ox = engine->out_x1i;
-       int ow = engine->out_x2i - ox;
-       int *ly = engine->in_lookup_y + pkg->out_row1;
+       ox = engine->out_x1i;
+       ow = engine->out_x2i - ox;
+       ly = engine->in_lookup_y + pkg->out_row1;
 
-       BLEND_SWITCH(XBLEND_NN);
+       switch(input->get_color_model()) {
+       case BC_RGB_FLOAT:      rgb_float();    break;
+       case BC_RGBA_FLOAT:     rgba_float();   break;
+       case BC_RGB888:         rgb888();       break;
+       case BC_YUV888:         yuv888();       break;
+       case BC_RGBA8888:       rgba8888();     break;
+       case BC_YUVA8888:       yuva8888();     break;
+       case BC_RGB161616:      rgb161616();    break;
+       case BC_YUV161616:      yuv161616();    break;
+       case BC_RGBA16161616:   rgba16161616(); break;
+       case BC_YUVA16161616:   yuva16161616();  break;
+       }
 }
 
 NNEngine::NNEngine(int cpus)
diff --git a/cinelerra-5.1/cinelerra/overlaynearest.h b/cinelerra-5.1/cinelerra/overlaynearest.h
new file mode 100644 (file)
index 0000000..0f7c62d
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef __OVERLAYNEAREST_H__
+#define __OVERLAYNEAREST_H__
+#include "overlayframe.h"
+
+#define XBLEND_3NN(FN, temp_type, type, max, components, ofs, round) { \
+       temp_type opcty = fade * max + round, trnsp = max - opcty; \
+       type** output_rows = (type**)output->get_rows(); \
+       type** input_rows = (type**)input->get_rows(); \
+       ox *= components; \
+ \
+       for( int i=pkg->out_row1; i<pkg->out_row2; ++i ) { \
+               int *lx = engine->in_lookup_x; \
+               type* in_row = input_rows[*ly++]; \
+               type* output = output_rows[i] + ox; \
+               for( int j=ow; --j>=0; ) { \
+                       in_row += *lx++; \
+                       if( components == 4 ) { \
+                               temp_type r, g, b, a; \
+                               ALPHA4_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+                               ALPHA4_STORE(output, ofs, max); \
+                       } \
+                       else { \
+                               temp_type r, g, b; \
+                               ALPHA3_BLEND(FN, temp_type, in_row, output, max, ofs, ofs, round); \
+                               ALPHA3_STORE(output, ofs, max); \
+                       } \
+                       output += components; \
+               } \
+       } \
+} break
+
+#endif
index a4b485d..09d3d3a 100644 (file)
@@ -1,93 +1,9 @@
 #include "overlayframe.h"
+#include "overlaysample.h"
 
 /* Fully resampled scale / translate / blend ******************************/
 /* resample into a temporary row vector, then blend */
 
-#define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
-       float temp[oh*components]; \
-       temp_type opcty = fade * max + round, trnsp = max - opcty; \
-       type **output_rows = (type**)voutput->get_rows() + o1i; \
-       type **input_rows = (type**)vinput->get_rows(); \
- \
-       for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
-               type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
-               float *tempp = temp; \
-               if( !k ) { /* direct copy case */ \
-                       type *ip = input + i1i * components; \
-                       for(int j = 0; j < oh; j++) { \
-                               *tempp++ = *ip++; \
-                               *tempp++ = *ip++ - ofs; \
-                               *tempp++ = *ip++ - ofs; \
-                               if( components == 4 ) *tempp++ = *ip++; \
-                       } \
-               } \
-               else { /* resample */ \
-                       for(int j = 0; j < oh; j++) { \
-                               float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
-                               int ki = lookup_sk[j], x = lookup_sx0[j]; \
-                               type *ip = input + x * components; \
-                               while(x < lookup_sx1[j]) { \
-                                       float kv = k[abs(ki >> INDEX_FRACTION)]; \
-                                       /* handle fractional pixels on edges of input */ \
-                                       if(x == i1i) kv *= i1f; \
-                                       if(++x == i2i) kv *= i2f; \
-                                       racc += kv * *ip++; \
-                                       gacc += kv * (*ip++ - ofs); \
-                                       bacc += kv * (*ip++ - ofs); \
-                                       if( components == 4 ) { aacc += kv * *ip++; } \
-                                       ki += kd; \
-                               } \
-                               float wacc = lookup_wacc[j]; \
-                               *tempp++ = racc * wacc; \
-                               *tempp++ = gacc * wacc; \
-                               *tempp++ = bacc * wacc; \
-                               if( components == 4 ) { *tempp++ = aacc * wacc; } \
-                       } \
-               } \
- \
-               /* handle fractional pixels on edges of output */ \
-               temp[0] *= o1f;   temp[1] *= o1f;   temp[2] *= o1f; \
-               if( components == 4 ) temp[3] *= o1f; \
-               tempp = temp + (oh-1)*components; \
-               tempp[0] *= o2f;  tempp[1] *= o2f;  tempp[2] *= o2f; \
-               if( components == 4 ) tempp[3] *= o2f; \
-               tempp = temp; \
-               /* blend output */ \
-               for(int j = 0; j < oh; j++) { \
-                       type *output = output_rows[j] + i * components; \
-                       if( components == 4 ) { \
-                               temp_type r, g, b, a; \
-                               ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
-                               ALPHA4_STORE(output, ofs, max); \
-                       } \
-                       else { \
-                               temp_type r, g, b; \
-                               ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
-                               ALPHA3_STORE(output, ofs, max); \
-                       } \
-                       tempp += components; \
-               } \
-       } \
-       break; \
-}
-
-#define XBLEND_SAMPLE(FN) { \
-        switch(vinput->get_color_model()) { \
-        case BC_RGB_FLOAT:      XSAMPLE(FN, z_float,   z_float,    1.f,    3, 0.f,    0.f); \
-        case BC_RGBA_FLOAT:     XSAMPLE(FN, z_float,   z_float,    1.f,    4, 0.f,    0.f); \
-        case BC_RGB888:         XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   3, 0,      .5f); \
-        case BC_YUV888:         XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   3, 0x80,   .5f); \
-        case BC_RGBA8888:       XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   4, 0,      .5f); \
-        case BC_YUVA8888:       XSAMPLE(FN, z_int32_t, z_uint8_t,  0xff,   4, 0x80,   .5f); \
-        case BC_RGB161616:      XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0,      .5f); \
-        case BC_YUV161616:      XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 3, 0x8000, .5f); \
-        case BC_RGBA16161616:   XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0,      .5f); \
-        case BC_YUVA16161616:   XSAMPLE(FN, z_int64_t, z_uint16_t, 0xffff, 4, 0x8000, .5f); \
-        } \
-        break; \
-}
-
-
 SamplePackage::SamplePackage()
 {
 }
@@ -104,7 +20,7 @@ SampleUnit::~SampleUnit()
 
 void SampleUnit::process_package(LoadPackage *package)
 {
-       SamplePackage *pkg = (SamplePackage*)package;
+       pkg = (SamplePackage*)package;
 
        float i1  = engine->in1;
        float i2  = engine->in2;
@@ -114,36 +30,46 @@ void SampleUnit::process_package(LoadPackage *package)
        if(i2 - i1 <= 0 || o2 - o1 <= 0)
                return;
 
-       VFrame *voutput = engine->output;
-       VFrame *vinput = engine->input;
-       int mode = engine->mode;
-       float fade =
-               BC_CModels::has_alpha(vinput->get_color_model()) &&
+       voutput = engine->output;
+       vinput = engine->input;
+       mode = engine->mode;
+       fade = BC_CModels::has_alpha(vinput->get_color_model()) &&
                mode == TRANSFER_REPLACE ? 1.f : engine->alpha;
 
-       //int   iw  = vinput->get_w();
-       int   i1i = floor(i1);
-       int   i2i = ceil(i2);
-       float i1f = 1.f - i1 + i1i;
-       float i2f = 1.f - i2i + i2;
-
-       int   o1i = floor(o1);
-       int   o2i = ceil(o2);
-       float o1f = 1.f - o1 + o1i;
-       float o2f = 1.f - o2i + o2;
-       int   oh  = o2i - o1i;
-
-       float *k  = engine->kernel->lookup;
-       //float kw  = engine->kernel->width;
-       //int   kn  = engine->kernel->n;
-       int   kd = engine->kd;
-
-       int *lookup_sx0 = engine->lookup_sx0;
-       int *lookup_sx1 = engine->lookup_sx1;
-       int *lookup_sk = engine->lookup_sk;
-       float *lookup_wacc = engine->lookup_wacc;
-
-       BLEND_SWITCH(XBLEND_SAMPLE);
+       //iw  = vinput->get_w();
+       i1i = floor(i1);
+       i2i = ceil(i2);
+       i1f = 1.f - i1 + i1i;
+       i2f = 1.f - i2i + i2;
+
+       o1i = floor(o1);
+       o2i = ceil(o2);
+       o1f = 1.f - o1 + o1i;
+       o2f = 1.f - o2i + o2;
+       oh  = o2i - o1i;
+
+       k  = engine->kernel->lookup;
+       //kw  = engine->kernel->width;
+       //kn  = engine->kernel->n;
+       kd = engine->kd;
+
+       lookup_sx0 = engine->lookup_sx0;
+       lookup_sx1 = engine->lookup_sx1;
+       lookup_sk = engine->lookup_sk;
+       lookup_wacc = engine->lookup_wacc;
+
+       switch( vinput->get_color_model() ) {
+       case BC_RGB_FLOAT:      rgb_float();    break;
+       case BC_RGBA_FLOAT:     rgba_float();   break;
+       case BC_RGB888:         rgb888();       break;
+       case BC_YUV888:         yuv888();       break;
+       case BC_RGBA8888:       rgba8888();     break;
+       case BC_YUVA8888:       yuva8888();     break;
+       case BC_RGB161616:      rgb161616();    break;
+       case BC_YUV161616:      yuv161616();    break;
+       case BC_RGBA16161616:   rgba16161616(); break;
+       case BC_YUVA16161616:   yuva16161616();  break;
+       }
 }
 
 
diff --git a/cinelerra-5.1/cinelerra/overlaysample.h b/cinelerra-5.1/cinelerra/overlaysample.h
new file mode 100644 (file)
index 0000000..1d7d44a
--- /dev/null
@@ -0,0 +1,72 @@
+#ifndef __OVERLAYSAMPLE_H__
+#define __OVERLAYSAMPLE_H__
+#include "overlayframe.h"
+
+#define XSAMPLE(FN, temp_type, type, max, components, ofs, round) { \
+       float temp[oh*components]; \
+       temp_type opcty = fade * max + round, trnsp = max - opcty; \
+       type **output_rows = (type**)voutput->get_rows() + o1i; \
+       type **input_rows = (type**)vinput->get_rows(); \
+ \
+       for(int i = pkg->out_col1; i < pkg->out_col2; i++) { \
+               type *input = input_rows[i - engine->col_out1 + engine->row_in]; \
+               float *tempp = temp; \
+               if( !k ) { /* direct copy case */ \
+                       type *ip = input + i1i * components; \
+                       for( int j=oh; --j>=0; ) { \
+                               *tempp++ = *ip++; \
+                               *tempp++ = *ip++ - ofs; \
+                               *tempp++ = *ip++ - ofs; \
+                               if( components == 4 ) *tempp++ = *ip++; \
+                       } \
+               } \
+               else { /* resample */ \
+                       for( int j=0; j<oh; ++j ) { \
+                               float racc=0.f, gacc=0.f, bacc=0.f, aacc=0.f; \
+                               int ki = lookup_sk[j], x = lookup_sx0[j]; \
+                               type *ip = input + x * components; \
+                               while(x < lookup_sx1[j]) { \
+                                       float kv = k[abs(ki >> INDEX_FRACTION)]; \
+                                       /* handle fractional pixels on edges of input */ \
+                                       if(x == i1i) kv *= i1f; \
+                                       if(++x == i2i) kv *= i2f; \
+                                       racc += kv * *ip++; \
+                                       gacc += kv * (*ip++ - ofs); \
+                                       bacc += kv * (*ip++ - ofs); \
+                                       if( components == 4 ) { aacc += kv * *ip++; } \
+                                       ki += kd; \
+                               } \
+                               float wacc = lookup_wacc[j]; \
+                               *tempp++ = racc * wacc; \
+                               *tempp++ = gacc * wacc; \
+                               *tempp++ = bacc * wacc; \
+                               if( components == 4 ) { *tempp++ = aacc * wacc; } \
+                       } \
+               } \
+ \
+               /* handle fractional pixels on edges of output */ \
+               temp[0] *= o1f;   temp[1] *= o1f;   temp[2] *= o1f; \
+               if( components == 4 ) temp[3] *= o1f; \
+               tempp = temp + (oh-1)*components; \
+               tempp[0] *= o2f;  tempp[1] *= o2f;  tempp[2] *= o2f; \
+               if( components == 4 ) tempp[3] *= o2f; \
+               tempp = temp; \
+               /* blend output */ \
+               for( int j=0; j<oh; ++j ) { \
+                       type *output = output_rows[j] + i * components; \
+                       if( components == 4 ) { \
+                               temp_type r, g, b, a; \
+                               ALPHA4_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
+                               ALPHA4_STORE(output, ofs, max); \
+                       } \
+                       else { \
+                               temp_type r, g, b; \
+                               ALPHA3_BLEND(FN, temp_type, tempp, output, max, 0, ofs, round); \
+                               ALPHA3_STORE(output, ofs, max); \
+                       } \
+                       tempp += components; \
+               } \
+       } \
+} break
+
+#endif
index 60fda49..ff12aa4 100644 (file)
@@ -11,8 +11,10 @@ all: $(OUTPUT)
 clean:
        rm -rf $(OBJDIR) xfer*.[Ch]
 
+BFLAGS:=-Ofast
+
 $(OBJDIR)/%.o:          %.C
-       $(CXX) -I.. `cat $(OBJDIR)/c_flags` -O3 -DMSGQUAL=$* -c $< -o $@
+       $(CXX) -I.. `cat $(OBJDIR)/c_flags` $(BFLAGS) -DMSGQUAL=$* -c $< -o $@
 
 $(OUTPUT): $(patsubst %.C,$(OBJDIR)/%.o,$(wildcard *.C))
        touch $@
index 56f4351..b4f4d8a 100644 (file)
@@ -1,5 +1,5 @@
 
-CFLAGS += -I../ -I$(CINELERRA) -I$(GUICAST) -I../colors -fPIC
+CFLAGS += -I../ -I$(CINELERRA) -I$(GUICAST) -I../colors -fPIC -Ofast
 
 CFLAGS += $(static_incs)
 LFLAGS += $(static_libs)