View Issue Details

IDProjectCategoryView StatusLast Update
0002005ardourfeaturespublic2008-01-02 11:57
Reportertimblech Assigned To 
PrioritynormalSeverityfeatureReproducibilityalways
Status newResolutionopen 
Summary0002005: helping gcc's autovectorizer
Descriptionthe attached patch helps gcc's autovectorizer to vectorize some functions.

- use __restrict__ pointer to avoid aliasing problem
- use stl min/max functions instead of (double-precision) fmin/fmax

using gcc-4.2 and the CXXFLAGS -mfpmath=sse and -ftree-vectorize, some of the code can be translated to vectorized sse code ...
TagsNo tags attached.

Activities

2008-01-02 11:57

 

0001-helping-the-gcc-autovectorizer.patch (4,397 bytes)   
From 320d9f649a6f5630c6c2387d02e6902fc13ca4a1 Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Wed, 2 Jan 2008 12:31:31 +0100
Subject: [PATCH] helping the gcc autovectorizer
 only enable __restrict__ attribute with gcc


Signed-off-by: Tim Blechmann <tim@klingt.org>
---
 libs/ardour/ardour/utils.h |    4 ++++
 libs/ardour/audio_track.cc |    2 +-
 libs/ardour/io.cc          |    8 +++++---
 libs/ardour/mix.cc         |   10 +++++-----
 libs/ardour/utils.cc       |    2 +-
 5 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/libs/ardour/ardour/utils.h b/libs/ardour/ardour/utils.h
index 0d4ce08..b950fea 100644
--- a/libs/ardour/ardour/utils.h
+++ b/libs/ardour/ardour/utils.h
@@ -73,5 +73,9 @@ float meter_hold_to_float (ARDOUR::MeterHold);
 std::string CFStringRefToStdString(CFStringRef stringRef);
 #endif // HAVE_COREAUDIO
 
+#ifndef __GNUC__
+#define __restrict__ /* __restrict__ */
+#endif
+
 #endif /* __ardour_utils_h__ */
 
diff --git a/libs/ardour/audio_track.cc b/libs/ardour/audio_track.cc
index c2f76f3..65ddf07 100644
--- a/libs/ardour/audio_track.cc
+++ b/libs/ardour/audio_track.cc
@@ -722,7 +722,7 @@ AudioTrack::export_stuff (vector<Sample*>& buffers, uint32_t nbufs, nframes_t st
 		_gain_automation_curve.get_vector (start, start + nframes, gain_automation, nframes);
 
 		for (bi = buffers.begin(); bi != buffers.end(); ++bi) {
-			Sample *b = *bi;
+            Sample * __restrict__ b = *bi;
 			for (nframes_t n = 0; n < nframes; ++n) {
 				b[n] *= gain_automation[n];
 			}
diff --git a/libs/ardour/io.cc b/libs/ardour/io.cc
index 4f20717..1c416cf 100644
--- a/libs/ardour/io.cc
+++ b/libs/ardour/io.cc
@@ -310,8 +310,8 @@ IO::pan_automated (vector<Sample*>& bufs, uint32_t nbufs, nframes_t start, nfram
 void
 IO::pan (vector<Sample*>& bufs, uint32_t nbufs, nframes_t nframes, nframes_t offset, gain_t gain_coeff)
 {
-	Sample* dst;
-	Sample* src;
+    Sample* __restrict__ dst;
+    Sample* __restrict__ src;
 
 	/* io_lock, not taken: function must be called from Session::process() calltree */
 
@@ -513,8 +513,10 @@ IO::deliver_output_no_pan (vector<Sample *>& bufs, uint32_t nbufs, nframes_t nfr
 		} else if (actual_gain == 0.0f) {
 			memset (dst, 0, sizeof (Sample) * nframes);
 		} else {
+            Sample * __restrict__ dest = dst;
+            Sample * __restrict__ source = src;
 			for (nframes_t x = 0; x < nframes; ++x) {
-				dst[x] = src[x] * actual_gain;
+                dest[x] = source[x] * actual_gain;
 			}
 		}
 		
diff --git a/libs/ardour/mix.cc b/libs/ardour/mix.cc
index 2d31c8c..7f8039b 100644
--- a/libs/ardour/mix.cc
+++ b/libs/ardour/mix.cc
@@ -83,7 +83,7 @@ float
 compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current) 
 {
 	for (nframes_t i = 0; i < nsamples; ++i) {
-		current = f_max (current, fabsf (buf[i]));
+        current = std::max (current, std::abs(buf[i]));
 	}
 
 	return current;
@@ -100,8 +100,8 @@ find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
 
 	for (i = 0; i < nframes; i++) 
 	{
-		a = fmax (buf[i], a);
-		b = fmin (buf[i], b);
+        a = std::max (buf[i], a);
+        b = std::min (buf[i], b);
 	}
 
 	*max = a;
@@ -116,7 +116,7 @@ apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
 }
 
 void
-mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain)
+mix_buffers_with_gain (ARDOUR::Sample * __restrict__ dst, ARDOUR::Sample * __restrict__ src, nframes_t nframes, float gain)
 {
 	for (nframes_t i = 0; i < nframes; i++) {
 		dst[i] += src[i] * gain;
@@ -124,7 +124,7 @@ mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nfram
 }
 
 void
-mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes)
+mix_buffers_no_gain (ARDOUR::Sample * __restrict__ dst, ARDOUR::Sample * __restrict__ src, nframes_t nframes)
 {
 	for (nframes_t i=0; i < nframes; i++) {
 		dst[i] += src[i];
diff --git a/libs/ardour/utils.cc b/libs/ardour/utils.cc
index e34fdd7..85c1276 100644
--- a/libs/ardour/utils.cc
+++ b/libs/ardour/utils.cc
@@ -316,7 +316,7 @@ CFStringRefToStdString(CFStringRef stringRef)
 #endif // HAVE_COREAUDIO
 
 void
-compute_equal_power_fades (nframes_t nframes, float* in, float* out)
+compute_equal_power_fades (nframes_t nframes, float* __restrict__ in, float* __restrict__ out)
 {
 	double step;
 
-- 
1.5.3.7

Issue History

Date Modified Username Field Change
2008-01-02 11:57 timblech New Issue
2008-01-02 11:57 timblech File Added: 0001-helping-the-gcc-autovectorizer.patch