Download raw body.
UPDATE: libsamplerate
Implement SSE2 lrint() and lrintf() on amd64.
Index: Makefile
===================================================================
RCS file: /cvs/ports/audio/libsamplerate/Makefile,v
retrieving revision 1.27
diff -u -p -u -p -r1.27 Makefile
--- Makefile 5 Sep 2023 16:13:38 -0000 1.27
+++ Makefile 27 Apr 2024 00:26:05 -0000
@@ -2,7 +2,7 @@ COMMENT= audio sample rate conversion li
VER= 0.2.2
DISTNAME= libsamplerate-${VER}
-REVISION= 0
+REVISION= 1
CATEGORIES= audio
EXTRACT_SUFX= .tar.xz
@@ -18,7 +18,9 @@ SITES= https://github.com/libsndfile/lib
WANTLIB= m
-CONFIGURE_STYLE=gnu
+AUTOCONF_VERSION= 2.71
+AUTOMAKE_VERSION= 1.16
+CONFIGURE_STYLE=autoreconf
CONFIGURE_ARGS= --disable-cpu-clip \
--disable-fftw \
--disable-sndfile
Index: patches/patch-configure_ac
===================================================================
RCS file: patches/patch-configure_ac
diff -N patches/patch-configure_ac
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-configure_ac 27 Apr 2024 00:26:05 -0000
@@ -0,0 +1,43 @@
+- Implement SSE2 lrint() and lrintf()
+ 7a81766b14fa03e97822cf1e0b1651648df13116
+- use sse2 intrinsics for lrint/lrintf only on windows x64
+ c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
+- sse2 lrint/lrintf updates
+ c164eaa25ffdeedc7d25e731172cc45a25f483d4
+
+Index: configure.ac
+--- configure.ac.orig
++++ configure.ac
+@@ -89,7 +89,7 @@ m4_define([abi_version_patch], [lt_revision])
+
+ dnl ====================================================================================
+
+-AC_CHECK_HEADERS([stdbool.h stdint.h sys/times.h unistd.h])
++AC_CHECK_HEADERS([stdbool.h stdint.h sys/times.h unistd.h immintrin.h])
+
+ dnl ====================================================================================
+ dnl Couple of initializations here. Fill in real values later.
+@@ -105,6 +105,9 @@ AC_ARG_ENABLE([werror],
+ AC_ARG_ENABLE([cpu-clip],
+ [AS_HELP_STRING([--disable-cpu-clip], [disable tricky cpu specific clipper])])
+
++AC_ARG_ENABLE([sse2-lrint],
++ [AS_HELP_STRING([--enable-sse2-lrint], [implement lrintf using SSE2 on x86 CPUs if possible])])
++
+ AC_ARG_ENABLE([sndfile],
+ [AS_HELP_STRING([--disable-sndfile], [disable support for sndfile (default=autodetect)])], [], [enable_sndfile=auto])
+
+@@ -178,6 +181,13 @@ AS_IF([test "x$enable_cpu_clip" != "xno"], [
+
+ AC_DEFINE_UNQUOTED([CPU_CLIPS_POSITIVE], [${ac_cv_c_clip_positive}], [Host processor clips on positive float to int conversion.])
+ AC_DEFINE_UNQUOTED([CPU_CLIPS_NEGATIVE], [${ac_cv_c_clip_negative}], [Host processor clips on negative float to int conversion.])
++
++dnl ====================================================================================
++dnl Determine if the user enabled lrint implementations using SSE2.
++
++AS_IF([test "x$enable_sse2_lrint" = "xyes"], [
++ CFLAGS="$CFLAGS -DENABLE_SSE2_LRINT"
++ ])
+
+ dnl ====================================================================================
+ dnl Check for libsndfile which is required for the test and example programs.
Index: patches/patch-examples_audio_out_c
===================================================================
RCS file: patches/patch-examples_audio_out_c
diff -N patches/patch-examples_audio_out_c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-examples_audio_out_c 27 Apr 2024 00:26:05 -0000
@@ -0,0 +1,19 @@
+- Implement SSE2 lrint() and lrintf()
+ 7a81766b14fa03e97822cf1e0b1651648df13116
+- use sse2 intrinsics for lrint/lrintf only on windows x64
+ c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
+- sse2 lrint/lrintf updates
+ c164eaa25ffdeedc7d25e731172cc45a25f483d4
+
+Index: examples/audio_out.c
+--- examples/audio_out.c.orig
++++ examples/audio_out.c
+@@ -960,7 +960,7 @@ solaris_play (get_audio_callback_t callback, AUDIO_OUT
+
+ while ((read_frames = callback (callback_data, float_buffer, BUFFER_LEN / solaris_out->channels)))
+ { for (k = 0 ; k < read_frames * solaris_out->channels ; k++)
+- buffer [k] = lrint (32767.0 * float_buffer [k]) ;
++ buffer [k] = psf_lrint (32767.0 * float_buffer [k]) ;
+ write (solaris_out->fd, buffer, read_frames * solaris_out->channels * sizeof (short)) ;
+ } ;
+
Index: patches/patch-src_common_h
===================================================================
RCS file: patches/patch-src_common_h
diff -N patches/patch-src_common_h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_common_h 27 Apr 2024 00:26:05 -0000
@@ -0,0 +1,98 @@
+- Implement SSE2 lrint() and lrintf()
+ 7a81766b14fa03e97822cf1e0b1651648df13116
+- use sse2 intrinsics for lrint/lrintf only on windows x64
+ c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
+- sse2 lrint/lrintf updates
+ c164eaa25ffdeedc7d25e731172cc45a25f483d4
+
+Index: src/common.h
+--- src/common.h.orig
++++ src/common.h
+@@ -14,6 +14,36 @@
+ #include <stdbool.h>
+ #endif
+
++#if defined(__x86_64__) || defined(_M_X64)
++# define HAVE_SSE2_INTRINSICS
++#elif defined(ENABLE_SSE2_LRINT) && (defined(_M_IX86) || defined(__i386__))
++# if defined(_MSC_VER)
++# define HAVE_SSE2_INTRINSICS
++# elif defined(__clang__)
++# ifdef __SSE2__
++# define HAVE_SSE2_INTRINSICS
++# elif (__has_attribute(target))
++# define HAVE_SSE2_INTRINSICS
++# define USE_TARGET_ATTRIBUTE
++# endif
++# elif defined(__GNUC__)
++# ifdef __SSE2__
++# define HAVE_SSE2_INTRINSICS
++# elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
++# define HAVE_SSE2_INTRINSICS
++# define USE_TARGET_ATTRIBUTE
++# endif
++# endif
++#endif
++
++#ifdef HAVE_SSE2_INTRINSICS
++#ifdef HAVE_IMMINTRIN_H
++#include <immintrin.h>
++#else
++#include <emmintrin.h>
++#endif
++#endif /* HAVE_SSE2_INTRINSICS */
++
+ #include <math.h>
+
+ #ifdef HAVE_VISIBILITY
+@@ -163,6 +193,41 @@ const char* zoh_get_description (int src_enum) ;
+ SRC_STATE *zoh_state_new (int channels, SRC_ERROR *error) ;
+
+ /*----------------------------------------------------------
++** SIMD optimized math functions.
++*/
++
++#ifdef HAVE_SSE2_INTRINSICS
++static inline int
++#ifdef USE_TARGET_ATTRIBUTE
++__attribute__((target("sse2")))
++#endif
++psf_lrintf (float x)
++{
++ return _mm_cvtss_si32 (_mm_load_ss (&x)) ;
++}
++static inline int
++#ifdef USE_TARGET_ATTRIBUTE
++__attribute__((target("sse2")))
++#endif
++psf_lrint (double x)
++{
++ return _mm_cvtsd_si32 (_mm_load_sd (&x)) ;
++}
++
++#else
++
++static inline int psf_lrintf (float x)
++{
++ return lrintf (x) ;
++} /* psf_lrintf */
++
++static inline int psf_lrint (double x)
++{
++ return lrint (x) ;
++} /* psf_lrint */
++#endif
++
++/*----------------------------------------------------------
+ ** Common static inline functions.
+ */
+
+@@ -170,7 +235,7 @@ static inline double
+ fmod_one (double x)
+ { double res ;
+
+- res = x - lrint (x) ;
++ res = x - psf_lrint (x) ;
+ if (res < 0.0)
+ return res + 1.0 ;
+
Index: patches/patch-src_samplerate_c
===================================================================
RCS file: patches/patch-src_samplerate_c
diff -N patches/patch-src_samplerate_c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_samplerate_c 27 Apr 2024 00:26:05 -0000
@@ -0,0 +1,28 @@
+- Implement SSE2 lrint() and lrintf()
+ 7a81766b14fa03e97822cf1e0b1651648df13116
+- use sse2 intrinsics for lrint/lrintf only on windows x64
+ c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
+- sse2 lrint/lrintf updates
+ c164eaa25ffdeedc7d25e731172cc45a25f483d4
+
+Index: src/samplerate.c
+--- src/samplerate.c.orig
++++ src/samplerate.c
+@@ -445,7 +445,7 @@ src_float_to_short_array (const float *in, short *out,
+ else if (scaled_value <= -32768.f)
+ out [i] = -32768 ;
+ else
+- out [i] = (short) (lrintf (scaled_value)) ;
++ out [i] = (short) (psf_lrintf (scaled_value)) ;
+ }
+ } /* src_float_to_short_array */
+
+@@ -477,7 +477,7 @@ src_float_to_int_array (const float *in, int *out, int
+ continue ;
+ } ;
+ #endif
+- out [i] = (int) lrint (scaled_value) ;
++ out [i] = (int) psf_lrint (scaled_value) ;
+ } ;
+
+ } /* src_float_to_int_array */
Index: patches/patch-src_src_linear_c
===================================================================
RCS file: patches/patch-src_src_linear_c
diff -N patches/patch-src_src_linear_c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_src_linear_c 27 Apr 2024 00:26:05 -0000
@@ -0,0 +1,28 @@
+- Implement SSE2 lrint() and lrintf()
+ 7a81766b14fa03e97822cf1e0b1651648df13116
+- use sse2 intrinsics for lrint/lrintf only on windows x64
+ c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
+- sse2 lrint/lrintf updates
+ c164eaa25ffdeedc7d25e731172cc45a25f483d4
+
+Index: src/src_linear.c
+--- src/src_linear.c.orig
++++ src/src_linear.c
+@@ -102,7 +102,7 @@ linear_vari_process (SRC_STATE *state, SRC_DATA *data)
+ } ;
+
+ rem = fmod_one (input_index) ;
+- priv->in_used += state->channels * lrint (input_index - rem) ;
++ priv->in_used += state->channels * psf_lrint (input_index - rem) ;
+ input_index = rem ;
+
+ /* Main processing loop. */
+@@ -128,7 +128,7 @@ linear_vari_process (SRC_STATE *state, SRC_DATA *data)
+ input_index += 1.0 / src_ratio ;
+ rem = fmod_one (input_index) ;
+
+- priv->in_used += state->channels * lrint (input_index - rem) ;
++ priv->in_used += state->channels * psf_lrint (input_index - rem) ;
+ input_index = rem ;
+ } ;
+
Index: patches/patch-src_src_sinc_c
===================================================================
RCS file: patches/patch-src_src_sinc_c
diff -N patches/patch-src_src_sinc_c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_src_sinc_c 27 Apr 2024 00:26:05 -0000
@@ -0,0 +1,148 @@
+- Implement SSE2 lrint() and lrintf()
+ 7a81766b14fa03e97822cf1e0b1651648df13116
+- use sse2 intrinsics for lrint/lrintf only on windows x64
+ c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
+- sse2 lrint/lrintf updates
+ c164eaa25ffdeedc7d25e731172cc45a25f483d4
+
+Index: src/src_sinc.c
+--- src/src_sinc.c.orig
++++ src/src_sinc.c
+@@ -132,7 +132,7 @@ static SRC_STATE_VT sinc_mono_state_vt =
+
+ static inline increment_t
+ double_to_fp (double x)
+-{ return (increment_t) (lrint ((x) * FP_ONE)) ;
++{ return (increment_t) (psf_lrint ((x) * FP_ONE)) ;
+ } /* double_to_fp */
+
+ static inline increment_t
+@@ -240,7 +240,7 @@ sinc_filter_new (int converter_type, int channels)
+ #endif
+ }
+
+- priv->b_len = 3 * (int) lrint ((priv->coeff_half_len + 2.0) / priv->index_inc * SRC_MAX_RATIO + 1) ;
++ priv->b_len = 3 * (int) psf_lrint ((priv->coeff_half_len + 2.0) / priv->index_inc * SRC_MAX_RATIO + 1) ;
+ priv->b_len = MAX (priv->b_len, 4096) ;
+ priv->b_len *= channels ;
+ priv->b_len += 1 ; // There is a <= check against samples_in_hand requiring a buffer bigger than the calculation above
+@@ -458,12 +458,12 @@ sinc_mono_vari_process (SRC_STATE *state, SRC_DATA *da
+ count /= MIN (state->last_ratio, data->src_ratio) ;
+
+ /* Maximum coefficientson either side of center point. */
+- half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
++ half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
+
+ input_index = state->last_position ;
+
+ rem = fmod_one (input_index) ;
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+
+ terminate = 1.0 / src_ratio + 1e-20 ;
+@@ -505,7 +505,7 @@ sinc_mono_vari_process (SRC_STATE *state, SRC_DATA *da
+ input_index += 1.0 / src_ratio ;
+ rem = fmod_one (input_index) ;
+
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+ } ;
+
+@@ -614,12 +614,12 @@ sinc_stereo_vari_process (SRC_STATE *state, SRC_DATA *
+ count /= MIN (state->last_ratio, data->src_ratio) ;
+
+ /* Maximum coefficientson either side of center point. */
+- half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
++ half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
+
+ input_index = state->last_position ;
+
+ rem = fmod_one (input_index) ;
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+
+ terminate = 1.0 / src_ratio + 1e-20 ;
+@@ -660,7 +660,7 @@ sinc_stereo_vari_process (SRC_STATE *state, SRC_DATA *
+ input_index += 1.0 / src_ratio ;
+ rem = fmod_one (input_index) ;
+
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+ } ;
+
+@@ -770,12 +770,12 @@ sinc_quad_vari_process (SRC_STATE *state, SRC_DATA *da
+ count /= MIN (state->last_ratio, data->src_ratio) ;
+
+ /* Maximum coefficientson either side of center point. */
+- half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
++ half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
+
+ input_index = state->last_position ;
+
+ rem = fmod_one (input_index) ;
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+
+ terminate = 1.0 / src_ratio + 1e-20 ;
+@@ -816,7 +816,7 @@ sinc_quad_vari_process (SRC_STATE *state, SRC_DATA *da
+ input_index += 1.0 / src_ratio ;
+ rem = fmod_one (input_index) ;
+
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+ } ;
+
+@@ -925,12 +925,12 @@ sinc_hex_vari_process (SRC_STATE *state, SRC_DATA *dat
+ count /= MIN (state->last_ratio, data->src_ratio) ;
+
+ /* Maximum coefficientson either side of center point. */
+- half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
++ half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
+
+ input_index = state->last_position ;
+
+ rem = fmod_one (input_index) ;
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+
+ terminate = 1.0 / src_ratio + 1e-20 ;
+@@ -971,7 +971,7 @@ sinc_hex_vari_process (SRC_STATE *state, SRC_DATA *dat
+ input_index += 1.0 / src_ratio ;
+ rem = fmod_one (input_index) ;
+
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+ } ;
+
+@@ -1090,12 +1090,12 @@ sinc_multichan_vari_process (SRC_STATE *state, SRC_DAT
+ count /= MIN (state->last_ratio, data->src_ratio) ;
+
+ /* Maximum coefficientson either side of center point. */
+- half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
++ half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
+
+ input_index = state->last_position ;
+
+ rem = fmod_one (input_index) ;
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+
+ terminate = 1.0 / src_ratio + 1e-20 ;
+@@ -1136,7 +1136,7 @@ sinc_multichan_vari_process (SRC_STATE *state, SRC_DAT
+ input_index += 1.0 / src_ratio ;
+ rem = fmod_one (input_index) ;
+
+- filter->b_current = (filter->b_current + state->channels * lrint (input_index - rem)) % filter->b_len ;
++ filter->b_current = (filter->b_current + state->channels * psf_lrint (input_index - rem)) % filter->b_len ;
+ input_index = rem ;
+ } ;
+
Index: patches/patch-src_src_zoh_c
===================================================================
RCS file: patches/patch-src_src_zoh_c
diff -N patches/patch-src_src_zoh_c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_src_zoh_c 27 Apr 2024 00:26:05 -0000
@@ -0,0 +1,28 @@
+- Implement SSE2 lrint() and lrintf()
+ 7a81766b14fa03e97822cf1e0b1651648df13116
+- use sse2 intrinsics for lrint/lrintf only on windows x64
+ c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
+- sse2 lrint/lrintf updates
+ c164eaa25ffdeedc7d25e731172cc45a25f483d4
+
+Index: src/src_zoh.c
+--- src/src_zoh.c.orig
++++ src/src_zoh.c
+@@ -99,7 +99,7 @@ zoh_vari_process (SRC_STATE *state, SRC_DATA *data)
+ } ;
+
+ rem = fmod_one (input_index) ;
+- priv->in_used += state->channels * lrint (input_index - rem) ;
++ priv->in_used += state->channels * psf_lrint (input_index - rem) ;
+ input_index = rem ;
+
+ /* Main processing loop. */
+@@ -117,7 +117,7 @@ zoh_vari_process (SRC_STATE *state, SRC_DATA *data)
+ input_index += 1.0 / src_ratio ;
+ rem = fmod_one (input_index) ;
+
+- priv->in_used += state->channels * lrint (input_index - rem) ;
++ priv->in_used += state->channels * psf_lrint (input_index - rem) ;
+ input_index = rem ;
+ } ;
+
UPDATE: libsamplerate