View Issue Details
| ID | Project | Category | View Status | Date Submitted | Last Update |
|---|---|---|---|---|---|
| 0002005 | ardour | features | public | 2008-01-02 11:57 | 2008-01-02 11:57 |
| Reporter | timblech | Assigned To | |||
| Priority | normal | Severity | feature | Reproducibility | always |
| Status | new | Resolution | open | ||
| Summary | 0002005: helping gcc's autovectorizer | ||||
| Description | the attached patch helps gcc's autovectorizer to vectorize some functions. - use __restrict__ pointer to avoid aliasing problem - use stl min/max functions instead of (double-precision) fmin/fmax using gcc-4.2 and the CXXFLAGS -mfpmath=sse and -ftree-vectorize, some of the code can be translated to vectorized sse code ... | ||||
| Tags | No tags attached. | ||||
|
2008-01-02 11:57
|
0001-helping-the-gcc-autovectorizer.patch (4,397 bytes)
From 320d9f649a6f5630c6c2387d02e6902fc13ca4a1 Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Wed, 2 Jan 2008 12:31:31 +0100
Subject: [PATCH] helping the gcc autovectorizer
only enable __restrict__ attribute with gcc
Signed-off-by: Tim Blechmann <tim@klingt.org>
---
libs/ardour/ardour/utils.h | 4 ++++
libs/ardour/audio_track.cc | 2 +-
libs/ardour/io.cc | 8 +++++---
libs/ardour/mix.cc | 10 +++++-----
libs/ardour/utils.cc | 2 +-
5 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/libs/ardour/ardour/utils.h b/libs/ardour/ardour/utils.h
index 0d4ce08..b950fea 100644
--- a/libs/ardour/ardour/utils.h
+++ b/libs/ardour/ardour/utils.h
@@ -73,5 +73,9 @@ float meter_hold_to_float (ARDOUR::MeterHold);
std::string CFStringRefToStdString(CFStringRef stringRef);
#endif // HAVE_COREAUDIO
+#ifndef __GNUC__
+#define __restrict__ /* __restrict__ */
+#endif
+
#endif /* __ardour_utils_h__ */
diff --git a/libs/ardour/audio_track.cc b/libs/ardour/audio_track.cc
index c2f76f3..65ddf07 100644
--- a/libs/ardour/audio_track.cc
+++ b/libs/ardour/audio_track.cc
@@ -722,7 +722,7 @@ AudioTrack::export_stuff (vector<Sample*>& buffers, uint32_t nbufs, nframes_t st
_gain_automation_curve.get_vector (start, start + nframes, gain_automation, nframes);
for (bi = buffers.begin(); bi != buffers.end(); ++bi) {
- Sample *b = *bi;
+ Sample * __restrict__ b = *bi;
for (nframes_t n = 0; n < nframes; ++n) {
b[n] *= gain_automation[n];
}
diff --git a/libs/ardour/io.cc b/libs/ardour/io.cc
index 4f20717..1c416cf 100644
--- a/libs/ardour/io.cc
+++ b/libs/ardour/io.cc
@@ -310,8 +310,8 @@ IO::pan_automated (vector<Sample*>& bufs, uint32_t nbufs, nframes_t start, nfram
void
IO::pan (vector<Sample*>& bufs, uint32_t nbufs, nframes_t nframes, nframes_t offset, gain_t gain_coeff)
{
- Sample* dst;
- Sample* src;
+ Sample* __restrict__ dst;
+ Sample* __restrict__ src;
/* io_lock, not taken: function must be called from Session::process() calltree */
@@ -513,8 +513,10 @@ IO::deliver_output_no_pan (vector<Sample *>& bufs, uint32_t nbufs, nframes_t nfr
} else if (actual_gain == 0.0f) {
memset (dst, 0, sizeof (Sample) * nframes);
} else {
+ Sample * __restrict__ dest = dst;
+ Sample * __restrict__ source = src;
for (nframes_t x = 0; x < nframes; ++x) {
- dst[x] = src[x] * actual_gain;
+ dest[x] = source[x] * actual_gain;
}
}
diff --git a/libs/ardour/mix.cc b/libs/ardour/mix.cc
index 2d31c8c..7f8039b 100644
--- a/libs/ardour/mix.cc
+++ b/libs/ardour/mix.cc
@@ -83,7 +83,7 @@ float
compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current)
{
for (nframes_t i = 0; i < nsamples; ++i) {
- current = f_max (current, fabsf (buf[i]));
+ current = std::max (current, std::abs(buf[i]));
}
return current;
@@ -100,8 +100,8 @@ find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
for (i = 0; i < nframes; i++)
{
- a = fmax (buf[i], a);
- b = fmin (buf[i], b);
+ a = std::max (buf[i], a);
+ b = std::min (buf[i], b);
}
*max = a;
@@ -116,7 +116,7 @@ apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
}
void
-mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain)
+mix_buffers_with_gain (ARDOUR::Sample * __restrict__ dst, ARDOUR::Sample * __restrict__ src, nframes_t nframes, float gain)
{
for (nframes_t i = 0; i < nframes; i++) {
dst[i] += src[i] * gain;
@@ -124,7 +124,7 @@ mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nfram
}
void
-mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes)
+mix_buffers_no_gain (ARDOUR::Sample * __restrict__ dst, ARDOUR::Sample * __restrict__ src, nframes_t nframes)
{
for (nframes_t i=0; i < nframes; i++) {
dst[i] += src[i];
diff --git a/libs/ardour/utils.cc b/libs/ardour/utils.cc
index e34fdd7..85c1276 100644
--- a/libs/ardour/utils.cc
+++ b/libs/ardour/utils.cc
@@ -316,7 +316,7 @@ CFStringRefToStdString(CFStringRef stringRef)
#endif // HAVE_COREAUDIO
void
-compute_equal_power_fades (nframes_t nframes, float* in, float* out)
+compute_equal_power_fades (nframes_t nframes, float* __restrict__ in, float* __restrict__ out)
{
double step;
--
1.5.3.7
|