Commit 6a1cf64f authored by DrKLO's avatar DrKLO

Update to 3.18.0

parent 64e8ec3f

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

[submodule "TMessagesProj/jni/libtgvoip"]
path = TMessagesProj/jni/libtgvoip
url = https://github.com/grishka/libtgvoip
...@@ -9,19 +9,22 @@ configurations { ...@@ -9,19 +9,22 @@ configurations {
} }
dependencies { dependencies {
compile 'com.google.android.gms:play-services-gcm:9.6.1' compile 'com.google.android.gms:play-services-gcm:10.2.0'
compile 'com.google.android.gms:play-services-maps:9.6.1' compile 'com.google.android.gms:play-services-maps:10.2.0'
compile 'com.google.android.gms:play-services-vision:9.6.1' compile 'com.google.android.gms:play-services-vision:10.2.0'
compile 'com.android.support:support-core-ui:24.2.1' compile 'com.android.support:support-core-ui:25.3.0'
compile 'com.android.support:support-compat:24.2.1' compile 'com.android.support:support-compat:25.3.0'
compile 'com.android.support:support-core-utils:24.2.1' compile 'com.android.support:support-core-utils:25.3.0'
compile 'net.hockeyapp.android:HockeySDK:4.0.1' compile 'com.android.support:support-v13:25.3.0'
compile 'com.android.support:palette-v7:25.3.0'
compile 'net.hockeyapp.android:HockeySDK:4.1.2'
compile 'com.googlecode.mp4parser:isoparser:1.0.6' compile 'com.googlecode.mp4parser:isoparser:1.0.6'
compile 'com.stripe:stripe-android:2.0.2'
} }
android { android {
compileSdkVersion 24 compileSdkVersion 25
buildToolsVersion '24.0.2' buildToolsVersion '25.0.2'
useLibrary 'org.apache.http.legacy' useLibrary 'org.apache.http.legacy'
defaultConfig.applicationId = "org.telegram.messenger" defaultConfig.applicationId = "org.telegram.messenger"
...@@ -34,6 +37,10 @@ android { ...@@ -34,6 +37,10 @@ android {
} }
} }
dexOptions {
jumboMode = true
}
compileOptions { compileOptions {
sourceCompatibility JavaVersion.VERSION_1_7 sourceCompatibility JavaVersion.VERSION_1_7
targetCompatibility JavaVersion.VERSION_1_7 targetCompatibility JavaVersion.VERSION_1_7
...@@ -70,6 +77,7 @@ android { ...@@ -70,6 +77,7 @@ android {
jniDebuggable false jniDebuggable false
signingConfig signingConfigs.release signingConfig signingConfigs.release
minifyEnabled false minifyEnabled false
shrinkResources false
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
} }
...@@ -80,7 +88,7 @@ android { ...@@ -80,7 +88,7 @@ android {
} }
} }
defaultConfig.versionCode = 851 defaultConfig.versionCode = 957
sourceSets.debug { sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest.xml' manifest.srcFile 'config/debug/AndroidManifest.xml'
...@@ -107,20 +115,52 @@ android { ...@@ -107,20 +115,52 @@ android {
} }
versionCode = 1 versionCode = 1
} }
fat { x86_SDK23 {
ndk {
abiFilter "x86"
}
sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest_SDK23.xml'
}
sourceSets.release {
manifest.srcFile 'config/release/AndroidManifest_SDK23.xml'
}
minSdkVersion 23
versionCode = 4
}
armv7_SDK23 {
ndk {
abiFilter "armeabi-v7a"
}
sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest_SDK23.xml'
}
sourceSets.release {
manifest.srcFile 'config/release/AndroidManifest_SDK23.xml'
}
minSdkVersion 23
versionCode = 3 versionCode = 3
} }
fat {
sourceSets.debug {
manifest.srcFile 'config/debug/AndroidManifest_SDK23.xml'
}
sourceSets.release {
manifest.srcFile 'config/release/AndroidManifest_SDK23.xml'
}
versionCode = 5
}
} }
applicationVariants.all { variant -> applicationVariants.all { variant ->
def abiVersion = variant.productFlavors.get(0).versionCode def abiVersion = variant.productFlavors.get(0).versionCode
variant.mergedFlavor.versionCode = defaultConfig.versionCode * 10 + abiVersion; variant.mergedFlavor.versionCode = defaultConfig.versionCode * 10 + abiVersion
} }
defaultConfig { defaultConfig {
minSdkVersion 14 minSdkVersion 14
targetSdkVersion 24 targetSdkVersion 25
versionName "3.13.1" versionName "3.18.0"
externalNativeBuild { externalNativeBuild {
ndkBuild { ndkBuild {
......
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
package="org.telegram.messenger"
android:installLocation="auto">
<uses-feature android:name="android.hardware.location.gps" android:required="false" />
<uses-feature android:name="android.hardware.location.network" android:required="false" />
<uses-feature android:name="android.hardware.location" android:required="false" />
<uses-feature android:name="android.hardware.LOCATION" android:required="false" />
<uses-permission android:name="com.google.android.c2dm.permission.RECEIVE" />
<uses-permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE"/>
<uses-permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" />
<uses-permission android:name="com.google.android.providers.gsf.permission.READ_GSERVICES"/>
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
<uses-permission android:name="android.permission.CALL_PHONE" />
<uses-permission android:name="android.permission.READ_CALL_LOG" />
<uses-permission android:name="android.permission.WRITE_CALL_LOG" />
<permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE" android:protectionLevel="signature"/>
<permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" android:protectionLevel="signature" />
<application
android:allowBackup="false"
android:icon="@drawable/ic_launcher"
android:label="@string/AppNameBeta"
android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />
<activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver
android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
<action android:name="com.google.android.c2dm.intent.REGISTRATION" />
<category android:name="org.telegram.messenger" />
</intent-filter>
</receiver>
<service
android:name=".GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name=".GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name=".GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/>
<receiver
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementReceiver"
android:enabled="false">
<intent-filter>
<action android:name="com.google.android.gms.measurement.UPLOAD" />
</intent-filter>
</receiver>
<service
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementService"
android:enabled="false"
android:exported="false" />
</application>
</manifest>
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
package="org.telegram.messenger"
android:installLocation="auto">
<uses-feature android:name="android.hardware.location.gps" android:required="false" />
<uses-feature android:name="android.hardware.location.network" android:required="false" />
<uses-feature android:name="android.hardware.location" android:required="false" />
<uses-feature android:name="android.hardware.LOCATION" android:required="false" />
<uses-permission android:name="com.google.android.c2dm.permission.RECEIVE" />
<uses-permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE"/>
<uses-permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" />
<uses-permission android:name="com.google.android.providers.gsf.permission.READ_GSERVICES"/>
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
<uses-permission android:name="android.permission.CALL_PHONE" />
<uses-permission android:name="android.permission.READ_CALL_LOG" />
<uses-permission android:name="android.permission.WRITE_CALL_LOG" />
<permission android:name="org.telegram.messenger.permission.MAPS_RECEIVE" android:protectionLevel="signature"/>
<permission android:name="org.telegram.messenger.permission.C2D_MESSAGE" android:protectionLevel="signature" />
<application
android:allowBackup="false"
android:icon="@drawable/ic_launcher"
android:label="@string/AppName"
android:theme="@style/Theme.TMessages.Start"
android:name=".ApplicationLoader"
android:hardwareAccelerated="@bool/useHardwareAcceleration"
android:largeHeap="true">
<meta-data android:name="com.google.android.maps.v2.API_KEY" android:value="AIzaSyA-t0jLPjUt2FxrA8VPK2EiYHcYcboIR6k" />
<activity android:name="net.hockeyapp.android.UpdateActivity" />
<receiver
android:name="com.google.android.gms.gcm.GcmReceiver"
android:exported="true"
android:permission="com.google.android.c2dm.permission.SEND" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
<action android:name="com.google.android.c2dm.intent.REGISTRATION" />
<category android:name="org.telegram.messenger" />
</intent-filter>
</receiver>
<service
android:name=".GcmPushListenerService"
android:exported="false" >
<intent-filter>
<action android:name="com.google.android.c2dm.intent.RECEIVE" />
</intent-filter>
</service>
<service
android:name=".GcmInstanceIDListenerService"
android:exported="false">
<intent-filter>
<action android:name="com.google.android.gms.iid.InstanceID" />
</intent-filter>
</service>
<service
android:name=".GcmRegistrationIntentService"
android:exported="false">
</service>
<uses-library android:name="com.google.android.maps" android:required="false"/>
<receiver
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementReceiver"
android:enabled="false">
<intent-filter>
<action android:name="com.google.android.gms.measurement.UPLOAD" />
</intent-filter>
</receiver>
<service
tools:replace="android:enabled"
android:name="com.google.android.gms.measurement.AppMeasurementService"
android:enabled="false"
android:exported="false" />
</application>
</manifest>
...@@ -108,6 +108,64 @@ include $(BUILD_STATIC_LIBRARY) ...@@ -108,6 +108,64 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS) include $(CLEAR_VARS)
LOCAL_MODULE := WebRtcAec
LOCAL_SRC_FILES := ./libtgvoip/external/libWebRtcAec_android_$(TARGET_ARCH_ABI).a
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := voip
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -finline-functions -ffast-math -Os -fno-strict-aliasing -O3
LOCAL_CFLAGS := -O3 -DUSE_KISS_FFT -fexceptions
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
# LOCAL_CPPFLAGS += -mfloat-abi=softfp -mfpu=neon
# LOCAL_CFLAGS += -mfloat-abi=softfp -mfpu=neon -DFLOATING_POINT
# LOCAL_ARM_NEON := true
else
LOCAL_CFLAGS += -DFIXED_POINT
ifeq ($(TARGET_ARCH_ABI),armeabi)
# LOCAL_CPPFLAGS += -mfloat-abi=softfp -mfpu=neon
# LOCAL_CFLAGS += -mfloat-abi=softfp -mfpu=neon
else
ifeq ($(TARGET_ARCH_ABI),x86)
endif
endif
endif
MY_DIR := libtgvoip
LOCAL_C_INCLUDES := jni/opus/include jni/boringssl/include/
LOCAL_SRC_FILES := \
./libtgvoip/logging.cpp \
./libtgvoip/VoIPController.cpp \
./libtgvoip/BufferInputStream.cpp \
./libtgvoip/BufferOutputStream.cpp \
./libtgvoip/BlockingQueue.cpp \
./libtgvoip/audio/AudioInput.cpp \
./libtgvoip/os/android/AudioInputOpenSLES.cpp \
./libtgvoip/MediaStreamItf.cpp \
./libtgvoip/audio/AudioOutput.cpp \
./libtgvoip/OpusEncoder.cpp \
./libtgvoip/os/android/AudioOutputOpenSLES.cpp \
./libtgvoip/JitterBuffer.cpp \
./libtgvoip/OpusDecoder.cpp \
./libtgvoip/BufferPool.cpp \
./libtgvoip/os/android/OpenSLEngineWrapper.cpp \
./libtgvoip/os/android/AudioInputAndroid.cpp \
./libtgvoip/os/android/AudioOutputAndroid.cpp \
./libtgvoip/EchoCanceller.cpp \
./libtgvoip/CongestionControl.cpp \
./libtgvoip/VoIPServerConfig.cpp
include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -frtti -DHAVE_PTHREAD -finline-functions -ffast-math -O0 LOCAL_CPPFLAGS := -Wall -std=c++11 -DANDROID -frtti -DHAVE_PTHREAD -finline-functions -ffast-math -O0
LOCAL_C_INCLUDES += ./jni/boringssl/include/ LOCAL_C_INCLUDES += ./jni/boringssl/include/
LOCAL_ARM_MODE := arm LOCAL_ARM_MODE := arm
...@@ -237,13 +295,13 @@ include $(BUILD_STATIC_LIBRARY) ...@@ -237,13 +295,13 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS) include $(CLEAR_VARS)
LOCAL_PRELINK_MODULE := false LOCAL_PRELINK_MODULE := false
LOCAL_MODULE := tmessages.24 LOCAL_MODULE := tmessages.26
LOCAL_CFLAGS := -w -std=c11 -Os -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64 LOCAL_CFLAGS := -w -std=c11 -Os -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
LOCAL_CFLAGS += -Drestrict='' -D__EMX__ -DOPUS_BUILD -DFIXED_POINT -DUSE_ALLOCA -DHAVE_LRINT -DHAVE_LRINTF -fno-math-errno LOCAL_CFLAGS += -Drestrict='' -D__EMX__ -DOPUS_BUILD -DFIXED_POINT -DUSE_ALLOCA -DHAVE_LRINT -DHAVE_LRINTF -fno-math-errno
LOCAL_CFLAGS += -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math -D__STDC_CONSTANT_MACROS LOCAL_CFLAGS += -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math -D__STDC_CONSTANT_MACROS
LOCAL_CPPFLAGS := -DBSD=1 -ffast-math -Os -funroll-loops -std=c++11 LOCAL_CPPFLAGS := -DBSD=1 -ffast-math -Os -funroll-loops -std=c++11
LOCAL_LDLIBS := -ljnigraphics -llog -lz -latomic LOCAL_LDLIBS := -ljnigraphics -llog -lz -latomic -lOpenSLES
LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad avformat avcodec avutil LOCAL_STATIC_LIBRARIES := webp sqlite tgnet breakpad avformat avcodec avutil voip WebRtcAec
LOCAL_SRC_FILES := \ LOCAL_SRC_FILES := \
./opus/src/opus.c \ ./opus/src/opus.c \
...@@ -261,6 +319,14 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) ...@@ -261,6 +319,14 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_ARM_MODE := arm LOCAL_ARM_MODE := arm
LOCAL_CPPFLAGS += -DLIBYUV_NEON LOCAL_CPPFLAGS += -DLIBYUV_NEON
LOCAL_CFLAGS += -DLIBYUV_NEON LOCAL_CFLAGS += -DLIBYUV_NEON
LOCAL_CFLAGS += -DOPUS_HAVE_RTCD -DOPUS_ARM_ASM
LOCAL_SRC_FILES += \
# ./opus/celt/arm/celt_neon_intr.c \
# ./opus/silk/arm/NSQ_neon.c \
./opus/silk/arm/arm_silk_map.c
# LOCAL_SRC_FILES += ./opus/celt/arm/celt_pitch_xcorr_arm-gnu.S
else else
ifeq ($(TARGET_ARCH_ABI),armeabi) ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_ARM_MODE := arm LOCAL_ARM_MODE := arm
...@@ -270,8 +336,25 @@ else ...@@ -270,8 +336,25 @@ else
LOCAL_CFLAGS += -Dx86fix LOCAL_CFLAGS += -Dx86fix
LOCAL_CPPFLAGS += -Dx86fix LOCAL_CPPFLAGS += -Dx86fix
LOCAL_ARM_MODE := arm LOCAL_ARM_MODE := arm
LOCAL_SRC_FILE += \ # LOCAL_SRC_FILES += \
./libyuv/source/row_x86.asm # ./libyuv/source/row_x86.asm
# LOCAL_SRC_FILES += \
# ./opus/celt/x86/celt_lpc_sse.c \
# ./opus/celt/x86/pitch_sse.c \
# ./opus/celt/x86/pitch_sse2.c \
# ./opus/celt/x86/pitch_sse4_1.c \
# ./opus/celt/x86/vq_sse2.c \
# ./opus/celt/x86/x86_celt_map.c \
# ./opus/celt/x86/x86cpu.c \
# ./opus/silk/fixed/x86/burg_modified_FIX_sse.c \
# ./opus/silk/fixed/x86/vector_ops_FIX_sse.c \
# ./opus/silk/x86/NSQ_del_dec_sse.c \
# ./opus/silk/x86/NSQ_sse.c \
# ./opus/silk/x86/VAD_sse.c \
# ./opus/silk/x86/VQ_WMat_sse.c \
# ./opus/silk/x86/x86_silk_map.c
endif endif
endif endif
endif endif
...@@ -352,7 +435,8 @@ LOCAL_SRC_FILES += \ ...@@ -352,7 +435,8 @@ LOCAL_SRC_FILES += \
./opus/silk/stereo_decode_pred.c \ ./opus/silk/stereo_decode_pred.c \
./opus/silk/stereo_encode_pred.c \ ./opus/silk/stereo_encode_pred.c \
./opus/silk/stereo_find_predictor.c \ ./opus/silk/stereo_find_predictor.c \
./opus/silk/stereo_quant_pred.c ./opus/silk/stereo_quant_pred.c \
./opus/silk/LPC_fit.c
LOCAL_SRC_FILES += \ LOCAL_SRC_FILES += \
./opus/silk/fixed/LTP_analysis_filter_FIX.c \ ./opus/silk/fixed/LTP_analysis_filter_FIX.c \
...@@ -364,12 +448,10 @@ LOCAL_SRC_FILES += \ ...@@ -364,12 +448,10 @@ LOCAL_SRC_FILES += \
./opus/silk/fixed/find_pitch_lags_FIX.c \ ./opus/silk/fixed/find_pitch_lags_FIX.c \
./opus/silk/fixed/find_pred_coefs_FIX.c \ ./opus/silk/fixed/find_pred_coefs_FIX.c \
./opus/silk/fixed/noise_shape_analysis_FIX.c \ ./opus/silk/fixed/noise_shape_analysis_FIX.c \
./opus/silk/fixed/prefilter_FIX.c \
./opus/silk/fixed/process_gains_FIX.c \ ./opus/silk/fixed/process_gains_FIX.c \
./opus/silk/fixed/regularize_correlations_FIX.c \ ./opus/silk/fixed/regularize_correlations_FIX.c \
./opus/silk/fixed/residual_energy16_FIX.c \ ./opus/silk/fixed/residual_energy16_FIX.c \
./opus/silk/fixed/residual_energy_FIX.c \ ./opus/silk/fixed/residual_energy_FIX.c \
./opus/silk/fixed/solve_LS_FIX.c \
./opus/silk/fixed/warped_autocorrelation_FIX.c \ ./opus/silk/fixed/warped_autocorrelation_FIX.c \
./opus/silk/fixed/apply_sine_window_FIX.c \ ./opus/silk/fixed/apply_sine_window_FIX.c \
./opus/silk/fixed/autocorr_FIX.c \ ./opus/silk/fixed/autocorr_FIX.c \
...@@ -483,7 +565,8 @@ LOCAL_SRC_FILES += \ ...@@ -483,7 +565,8 @@ LOCAL_SRC_FILES += \
./gifvideo.cpp \ ./gifvideo.cpp \
./SqliteWrapper.cpp \ ./SqliteWrapper.cpp \
./TgNetWrapper.cpp \ ./TgNetWrapper.cpp \
./NativeLoader.cpp ./NativeLoader.cpp \
./libtgvoip/client/android/tg_voip_jni.cpp
include $(BUILD_SHARED_LIBRARY) include $(BUILD_SHARED_LIBRARY)
......
APP_PLATFORM := android-9 APP_PLATFORM := android-14
APP_ABI := armeabi armeabi-v7a APP_ABI := armeabi armeabi-v7a
NDK_TOOLCHAIN_VERSION := 4.9 NDK_TOOLCHAIN_VERSION := 4.9
APP_STL := gnustl_static APP_STL := gnustl_static
\ No newline at end of file
...@@ -26,6 +26,8 @@ jmethodID jclass_ConnectionsManager_onLogout; ...@@ -26,6 +26,8 @@ jmethodID jclass_ConnectionsManager_onLogout;
jmethodID jclass_ConnectionsManager_onConnectionStateChanged; jmethodID jclass_ConnectionsManager_onConnectionStateChanged;
jmethodID jclass_ConnectionsManager_onInternalPushReceived; jmethodID jclass_ConnectionsManager_onInternalPushReceived;
jmethodID jclass_ConnectionsManager_onUpdateConfig; jmethodID jclass_ConnectionsManager_onUpdateConfig;
jmethodID jclass_ConnectionsManager_onBytesSent;
jmethodID jclass_ConnectionsManager_onBytesReceived;
jint createLoadOpetation(JNIEnv *env, jclass c, jint dc_id, jlong id, jlong volume_id, jlong access_hash, jint local_id, jbyteArray encKey, jbyteArray encIv, jstring extension, jint version, jint size, jstring dest, jstring temp, jobject delegate) { jint createLoadOpetation(JNIEnv *env, jclass c, jint dc_id, jlong id, jlong volume_id, jlong access_hash, jint local_id, jbyteArray encKey, jbyteArray encIv, jstring extension, jint version, jint size, jstring dest, jstring temp, jobject delegate) {
if (encKey != nullptr && encIv == nullptr || encKey == nullptr && encIv != nullptr || extension == nullptr || dest == nullptr || temp == nullptr) { if (encKey != nullptr && encIv == nullptr || encKey == nullptr && encIv != nullptr || extension == nullptr || dest == nullptr || temp == nullptr) {
...@@ -168,7 +170,7 @@ void sendRequest(JNIEnv *env, jclass c, jint object, jobject onComplete, jobject ...@@ -168,7 +170,7 @@ void sendRequest(JNIEnv *env, jclass c, jint object, jobject onComplete, jobject
if (onQuickAck != nullptr) { if (onQuickAck != nullptr) {
onQuickAck = env->NewGlobalRef(onQuickAck); onQuickAck = env->NewGlobalRef(onQuickAck);
} }
ConnectionsManager::getInstance().sendRequest(request, ([onComplete](TLObject *response, TL_error *error) { ConnectionsManager::getInstance().sendRequest(request, ([onComplete](TLObject *response, TL_error *error, int32_t networkType) {
TL_api_response *resp = (TL_api_response *) response; TL_api_response *resp = (TL_api_response *) response;
jint ptr = 0; jint ptr = 0;
jint errorCode = 0; jint errorCode = 0;
...@@ -180,7 +182,7 @@ void sendRequest(JNIEnv *env, jclass c, jint object, jobject onComplete, jobject ...@@ -180,7 +182,7 @@ void sendRequest(JNIEnv *env, jclass c, jint object, jobject onComplete, jobject
errorText = jniEnv->NewStringUTF(error->text.c_str()); errorText = jniEnv->NewStringUTF(error->text.c_str());
} }
if (onComplete != nullptr) { if (onComplete != nullptr) {
jniEnv->CallVoidMethod(onComplete, jclass_RequestDelegateInternal_run, ptr, errorCode, errorText); jniEnv->CallVoidMethod(onComplete, jclass_RequestDelegateInternal_run, ptr, errorCode, errorText, networkType);
} }
if (errorText != nullptr) { if (errorText != nullptr) {
jniEnv->DeleteLocalRef(errorText); jniEnv->DeleteLocalRef(errorText);
...@@ -246,8 +248,8 @@ void setUseIpv6(JNIEnv *env, jclass c, bool value) { ...@@ -246,8 +248,8 @@ void setUseIpv6(JNIEnv *env, jclass c, bool value) {
ConnectionsManager::getInstance().setUseIpv6(value); ConnectionsManager::getInstance().setUseIpv6(value);
} }
void setNetworkAvailable(JNIEnv *env, jclass c, jboolean value) { void setNetworkAvailable(JNIEnv *env, jclass c, jboolean value, jint networkType) {
ConnectionsManager::getInstance().setNetworkAvailable(value); ConnectionsManager::getInstance().setNetworkAvailable(value, networkType);
} }
void setPushConnectionEnabled(JNIEnv *env, jclass c, jboolean value) { void setPushConnectionEnabled(JNIEnv *env, jclass c, jboolean value) {
...@@ -289,9 +291,17 @@ class Delegate : public ConnectiosManagerDelegate { ...@@ -289,9 +291,17 @@ class Delegate : public ConnectiosManagerDelegate {
void onInternalPushReceived() { void onInternalPushReceived() {
jniEnv->CallStaticVoidMethod(jclass_ConnectionsManager, jclass_ConnectionsManager_onInternalPushReceived); jniEnv->CallStaticVoidMethod(jclass_ConnectionsManager, jclass_ConnectionsManager_onInternalPushReceived);
} }
void onBytesReceived(int32_t amount, int32_t networkType) {
jniEnv->CallStaticVoidMethod(jclass_ConnectionsManager, jclass_ConnectionsManager_onBytesReceived, amount, networkType);
}
void onBytesSent(int32_t amount, int32_t networkType) {
jniEnv->CallStaticVoidMethod(jclass_ConnectionsManager, jclass_ConnectionsManager_onBytesSent, amount, networkType);
}
}; };
void init(JNIEnv *env, jclass c, jint version, jint layer, jint apiId, jstring deviceModel, jstring systemVersion, jstring appVersion, jstring langCode, jstring configPath, jstring logPath, jint userId, jboolean enablePushConnection) { void init(JNIEnv *env, jclass c, jint version, jint layer, jint apiId, jstring deviceModel, jstring systemVersion, jstring appVersion, jstring langCode, jstring configPath, jstring logPath, jint userId, jboolean enablePushConnection, jboolean hasNetwork, jint networkType) {
const char *deviceModelStr = env->GetStringUTFChars(deviceModel, 0); const char *deviceModelStr = env->GetStringUTFChars(deviceModel, 0);
const char *systemVersionStr = env->GetStringUTFChars(systemVersion, 0); const char *systemVersionStr = env->GetStringUTFChars(systemVersion, 0);
const char *appVersionStr = env->GetStringUTFChars(appVersion, 0); const char *appVersionStr = env->GetStringUTFChars(appVersion, 0);
...@@ -299,7 +309,7 @@ void init(JNIEnv *env, jclass c, jint version, jint layer, jint apiId, jstring d ...@@ -299,7 +309,7 @@ void init(JNIEnv *env, jclass c, jint version, jint layer, jint apiId, jstring d
const char *configPathStr = env->GetStringUTFChars(configPath, 0); const char *configPathStr = env->GetStringUTFChars(configPath, 0);
const char *logPathStr = env->GetStringUTFChars(logPath, 0); const char *logPathStr = env->GetStringUTFChars(logPath, 0);
ConnectionsManager::getInstance().init(version, layer, apiId, std::string(deviceModelStr), std::string(systemVersionStr), std::string(appVersionStr), std::string(langCodeStr), std::string(configPathStr), std::string(logPathStr), userId, true, enablePushConnection); ConnectionsManager::getInstance().init(version, layer, apiId, std::string(deviceModelStr), std::string(systemVersionStr), std::string(appVersionStr), std::string(langCodeStr), std::string(configPathStr), std::string(logPathStr), userId, true, enablePushConnection, hasNetwork, networkType);
if (deviceModelStr != 0) { if (deviceModelStr != 0) {
env->ReleaseStringUTFChars(deviceModel, deviceModelStr); env->ReleaseStringUTFChars(deviceModel, deviceModelStr);
...@@ -339,13 +349,13 @@ static JNINativeMethod ConnectionsManagerMethods[] = { ...@@ -339,13 +349,13 @@ static JNINativeMethod ConnectionsManagerMethods[] = {
{"native_applyDatacenterAddress", "(ILjava/lang/String;I)V", (void *) applyDatacenterAddress}, {"native_applyDatacenterAddress", "(ILjava/lang/String;I)V", (void *) applyDatacenterAddress},
{"native_getConnectionState", "()I", (void *) getConnectionState}, {"native_getConnectionState", "()I", (void *) getConnectionState},
{"native_setUserId", "(I)V", (void *) setUserId}, {"native_setUserId", "(I)V", (void *) setUserId},
{"native_init", "(IIILjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IZ)V", (void *) init}, {"native_init", "(IIILjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IZZI)V", (void *) init},
{"native_switchBackend", "()V", (void *) switchBackend}, {"native_switchBackend", "()V", (void *) switchBackend},
{"native_pauseNetwork", "()V", (void *) pauseNetwork}, {"native_pauseNetwork", "()V", (void *) pauseNetwork},
{"native_resumeNetwork", "(Z)V", (void *) resumeNetwork}, {"native_resumeNetwork", "(Z)V", (void *) resumeNetwork},
{"native_updateDcSettings", "()V", (void *) updateDcSettings}, {"native_updateDcSettings", "()V", (void *) updateDcSettings},
{"native_setUseIpv6", "(Z)V", (void *) setUseIpv6}, {"native_setUseIpv6", "(Z)V", (void *) setUseIpv6},
{"native_setNetworkAvailable", "(Z)V", (void *) setNetworkAvailable}, {"native_setNetworkAvailable", "(ZI)V", (void *) setNetworkAvailable},
{"native_setPushConnectionEnabled", "(Z)V", (void *) setPushConnectionEnabled}, {"native_setPushConnectionEnabled", "(Z)V", (void *) setPushConnectionEnabled},
{"native_setJava", "(Z)V", (void *) setJava} {"native_setJava", "(Z)V", (void *) setJava}
}; };
...@@ -381,7 +391,7 @@ extern "C" int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env) { ...@@ -381,7 +391,7 @@ extern "C" int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env) {
if (jclass_RequestDelegateInternal == 0) { if (jclass_RequestDelegateInternal == 0) {
return JNI_FALSE; return JNI_FALSE;
} }
jclass_RequestDelegateInternal_run = env->GetMethodID(jclass_RequestDelegateInternal, "run", "(IILjava/lang/String;)V"); jclass_RequestDelegateInternal_run = env->GetMethodID(jclass_RequestDelegateInternal, "run", "(IILjava/lang/String;I)V");
if (jclass_RequestDelegateInternal_run == 0) { if (jclass_RequestDelegateInternal_run == 0) {
return JNI_FALSE; return JNI_FALSE;
} }
...@@ -447,6 +457,14 @@ extern "C" int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env) { ...@@ -447,6 +457,14 @@ extern "C" int registerNativeTgNetFunctions(JavaVM *vm, JNIEnv *env) {
if (jclass_ConnectionsManager_onUpdateConfig == 0) { if (jclass_ConnectionsManager_onUpdateConfig == 0) {
return JNI_FALSE; return JNI_FALSE;
} }
jclass_ConnectionsManager_onBytesSent = env->GetStaticMethodID(jclass_ConnectionsManager, "onBytesSent", "(II)V");
if (jclass_ConnectionsManager_onBytesSent == 0) {
return JNI_FALSE;
}
jclass_ConnectionsManager_onBytesReceived = env->GetStaticMethodID(jclass_ConnectionsManager, "onBytesReceived", "(II)V");
if (jclass_ConnectionsManager_onBytesReceived == 0) {
return JNI_FALSE;
}
ConnectionsManager::getInstance().setDelegate(new Delegate()); ConnectionsManager::getInstance().setDelegate(new Delegate());
return JNI_TRUE; return JNI_TRUE;
......
...@@ -127,7 +127,7 @@ void ec_GFp_nistp_points_make_affine_internal( ...@@ -127,7 +127,7 @@ void ec_GFp_nistp_points_make_affine_internal(
* *
* A left-shift followed by subtraction of the original value yields a new * A left-shift followed by subtraction of the original value yields a new
* representation of the same value, using signed bits s_i = b_(i+1) - b_i. * representation of the same value, using signed bits s_i = b_(i+1) - b_i.
* This representation from Booth's paper has since appeared in the * This representation from Booth's paper has since onAppeared in the
* literature under a variety of different names including "reversed binary * literature under a variety of different names including "reversed binary
* form", "alternating greedy expansion", "mutual opposite form", and * form", "alternating greedy expansion", "mutual opposite form", and
* "sign-alternating {+-1}-representation". * "sign-alternating {+-1}-representation".
......
...@@ -273,6 +273,5 @@ jint Java_org_telegram_ui_Components_AnimatedFileDrawable_getVideoFrame(JNIEnv * ...@@ -273,6 +273,5 @@ jint Java_org_telegram_ui_Components_AnimatedFileDrawable_getVideoFrame(JNIEnv *
return 1; return 1;
} }
} }
return 0;
} }
} }
Subproject commit eb813e1d133fdfb96f5efe9c767e86136aab1133
...@@ -58,16 +58,12 @@ ...@@ -58,16 +58,12 @@
# define S_MUL(a,b) MULT16_32_Q15(b, a) # define S_MUL(a,b) MULT16_32_Q15(b, a)
# define C_MUL(m,a,b) \ # define C_MUL(m,a,b) \
do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
# define C_MULC(m,a,b) \ # define C_MULC(m,a,b) \
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
# define C_MUL4(m,a,b) \
do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
(m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
# define C_MULBYSCALAR( c, s ) \ # define C_MULBYSCALAR( c, s ) \
do{ (c).r = S_MUL( (c).r , s ) ;\ do{ (c).r = S_MUL( (c).r , s ) ;\
...@@ -81,17 +77,17 @@ ...@@ -81,17 +77,17 @@
DIVSCALAR( (c).i , div); }while (0) DIVSCALAR( (c).i , div); }while (0)
#define C_ADD( res, a,b)\ #define C_ADD( res, a,b)\
do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \
}while(0) }while(0)
#define C_SUB( res, a,b)\ #define C_SUB( res, a,b)\
do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \
}while(0) }while(0)
#define C_ADDTO( res , a)\ #define C_ADDTO( res , a)\
do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\
}while(0) }while(0)
#define C_SUBFROM( res , a)\ #define C_SUBFROM( res , a)\
do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \
}while(0) }while(0)
#if defined(OPUS_ARM_INLINE_ASM) #if defined(OPUS_ARM_INLINE_ASM)
...@@ -101,6 +97,9 @@ ...@@ -101,6 +97,9 @@
#if defined(OPUS_ARM_INLINE_EDSP) #if defined(OPUS_ARM_INLINE_EDSP)
#include "arm/kiss_fft_armv5e.h" #include "arm/kiss_fft_armv5e.h"
#endif #endif
#if defined(MIPSr1_ASM)
#include "mips/kiss_fft_mipsr1.h"
#endif
#else /* not FIXED_POINT*/ #else /* not FIXED_POINT*/
......
...@@ -46,6 +46,14 @@ ...@@ -46,6 +46,14 @@
# endif # endif
# endif # endif
#if OPUS_GNUC_PREREQ(3, 0)
#define opus_likely(x) (__builtin_expect(!!(x), 1))
#define opus_unlikely(x) (__builtin_expect(!!(x), 0))
#else
#define opus_likely(x) (!!(x))
#define opus_unlikely(x) (!!(x))
#endif
#define CELT_SIG_SCALE 32768.f #define CELT_SIG_SCALE 32768.f
#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); #define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
...@@ -69,11 +77,8 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) ...@@ -69,11 +77,8 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
#define IMUL32(a,b) ((a)*(b)) #define IMUL32(a,b) ((a)*(b))
#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */
#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */
#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */
#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */
#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */
#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */
#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */
#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
...@@ -81,6 +86,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) ...@@ -81,6 +86,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
#define UADD32(a,b) ((a)+(b)) #define UADD32(a,b) ((a)+(b))
#define USUB32(a,b) ((a)-(b)) #define USUB32(a,b) ((a)-(b))
/* Set this if opus_int64 is a native type of the CPU. */
/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
(which can be ILP32 for x32) and Win64 (which is LLP64). */
#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
#define OPUS_FAST_INT64 1
#else
#define OPUS_FAST_INT64 0
#endif
#define PRINT_MIPS(file) #define PRINT_MIPS(file)
#ifdef FIXED_POINT #ifdef FIXED_POINT
...@@ -95,6 +109,9 @@ typedef opus_val32 celt_ener; ...@@ -95,6 +109,9 @@ typedef opus_val32 celt_ener;
#define Q15ONE 32767 #define Q15ONE 32767
#define SIG_SHIFT 12 #define SIG_SHIFT 12
/* Safe saturation value for 32-bit signals. Should be less than
2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
#define SIG_SAT (300000000)
#define NORM_SCALING 16384 #define NORM_SCALING 16384
...@@ -108,13 +125,22 @@ typedef opus_val32 celt_ener; ...@@ -108,13 +125,22 @@ typedef opus_val32 celt_ener;
#define SCALEIN(a) (a) #define SCALEIN(a) (a)
#define SCALEOUT(a) (a) #define SCALEOUT(a) (a)
#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
#define ABS32(x) ((x) < 0 ? (-(x)) : (x))
static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
}
#ifdef FIXED_DEBUG #ifdef FIXED_DEBUG
#include "fixed_debug.h" #include "fixed_debug.h"
#else #else
#include "fixed_generic.h" #include "fixed_generic.h"
#ifdef OPUS_ARM_INLINE_EDSP #ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
#include "arm/fixed_arm64.h"
#elif OPUS_ARM_INLINE_EDSP
#include "arm/fixed_armv5e.h" #include "arm/fixed_armv5e.h"
#elif defined (OPUS_ARM_INLINE_ASM) #elif defined (OPUS_ARM_INLINE_ASM)
#include "arm/fixed_armv4.h" #include "arm/fixed_armv4.h"
...@@ -137,6 +163,22 @@ typedef float celt_sig; ...@@ -137,6 +163,22 @@ typedef float celt_sig;
typedef float celt_norm; typedef float celt_norm;
typedef float celt_ener; typedef float celt_ener;
#ifdef FLOAT_APPROX
/* This code should reliably detect NaN/inf even when -ffast-math is used.
Assumes IEEE 754 format. */
static OPUS_INLINE int celt_isnan(float x)
{
union {float f; opus_uint32 i;} in;
in.f = x;
return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;
}
#else
#ifdef __FAST_MATH__
#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input
#endif
#define celt_isnan(x) ((x)!=(x))
#endif
#define Q15ONE 1.0f #define Q15ONE 1.0f
#define NORM_SCALING 1.f #define NORM_SCALING 1.f
...@@ -146,11 +188,16 @@ typedef float celt_ener; ...@@ -146,11 +188,16 @@ typedef float celt_ener;
#define VERY_LARGE16 1e15f #define VERY_LARGE16 1e15f
#define Q15_ONE ((opus_val16)1.f) #define Q15_ONE ((opus_val16)1.f)
/* This appears to be the same speed as C99's fabsf() but it's more portable. */
#define ABS16(x) ((float)fabs(x))
#define ABS32(x) ((float)fabs(x))
#define QCONST16(x,bits) (x) #define QCONST16(x,bits) (x)
#define QCONST32(x,bits) (x) #define QCONST32(x,bits) (x)
#define NEG16(x) (-(x)) #define NEG16(x) (-(x))
#define NEG32(x) (-(x)) #define NEG32(x) (-(x))
#define NEG32_ovflw(x) (-(x))
#define EXTRACT16(x) (x) #define EXTRACT16(x) (x)
#define EXTEND32(x) (x) #define EXTEND32(x) (x)
#define SHR16(a,shift) (a) #define SHR16(a,shift) (a)
...@@ -167,6 +214,7 @@ typedef float celt_ener; ...@@ -167,6 +214,7 @@ typedef float celt_ener;
#define SATURATE16(x) (x) #define SATURATE16(x) (x)
#define ROUND16(a,shift) (a) #define ROUND16(a,shift) (a)
#define SROUND16(a,shift) (a)
#define HALF16(x) (.5f*(x)) #define HALF16(x) (.5f*(x))
#define HALF32(x) (.5f*(x)) #define HALF32(x) (.5f*(x))
...@@ -174,6 +222,8 @@ typedef float celt_ener; ...@@ -174,6 +222,8 @@ typedef float celt_ener;
#define SUB16(a,b) ((a)-(b)) #define SUB16(a,b) ((a)-(b))
#define ADD32(a,b) ((a)+(b)) #define ADD32(a,b) ((a)+(b))
#define SUB32(a,b) ((a)-(b)) #define SUB32(a,b) ((a)-(b))
#define ADD32_ovflw(a,b) ((a)+(b))
#define SUB32_ovflw(a,b) ((a)-(b))
#define MULT16_16_16(a,b) ((a)*(b)) #define MULT16_16_16(a,b) ((a)*(b))
#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) #define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) #define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))
...@@ -184,6 +234,7 @@ typedef float celt_ener; ...@@ -184,6 +234,7 @@ typedef float celt_ener;
#define MULT32_32_Q31(a,b) ((a)*(b)) #define MULT32_32_Q31(a,b) ((a)*(b))
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
#define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b))
#define MULT16_16_Q11(a,b) ((a)*(b)) #define MULT16_16_Q11(a,b) ((a)*(b))
...@@ -201,6 +252,8 @@ typedef float celt_ener; ...@@ -201,6 +252,8 @@ typedef float celt_ener;
#define SCALEIN(a) ((a)*CELT_SIG_SCALE) #define SCALEIN(a) ((a)*CELT_SIG_SCALE)
#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE))
#define SIG2WORD16(x) (x)
#endif /* !FIXED_POINT */ #endif /* !FIXED_POINT */
#ifndef GLOBAL_STACK_SIZE #ifndef GLOBAL_STACK_SIZE
......
#!/usr/bin/perl #!/usr/bin/perl
# Copyright (C) 2002-2013 Xiph.org Foundation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
my $bigend; # little/big endian my $bigend; # little/big endian
my $nxstack; my $nxstack;
my $apple = 0;
my $symprefix = "";
$nxstack = 0; $nxstack = 0;
...@@ -10,11 +36,16 @@ eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}' ...@@ -10,11 +36,16 @@ eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
while ($ARGV[0] =~ /^-/) { while ($ARGV[0] =~ /^-/) {
$_ = shift; $_ = shift;
last if /^--/; last if /^--$/;
if (/^-n/) { if (/^-n$/) {
$nflag++; $nflag++;
next; next;
} }
if (/^--apple$/) {
$apple = 1;
$symprefix = "_";
next;
}
die "I don't recognize this switch: $_\\n"; die "I don't recognize this switch: $_\\n";
} }
$printit++ unless $nflag; $printit++ unless $nflag;
...@@ -25,6 +56,8 @@ $n=0; ...@@ -25,6 +56,8 @@ $n=0;
$thumb = 0; # ARM mode by default, not Thumb. $thumb = 0; # ARM mode by default, not Thumb.
@proc_stack = (); @proc_stack = ();
printf (" .syntax unified\n");
LINE: LINE:
while (<>) { while (<>) {
...@@ -53,7 +86,7 @@ while (<>) { ...@@ -53,7 +86,7 @@ while (<>) {
s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/; s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/; s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
s/\bIMPORT\b/.extern/; s/\bIMPORT\b/.extern/;
s/\bEXPORT\b/.global/; s/\bEXPORT\b\s*/.global $symprefix/;
s/^(\s+)\[/$1IF/; s/^(\s+)\[/$1IF/;
s/^(\s+)\|/$1ELSE/; s/^(\s+)\|/$1ELSE/;
s/^(\s+)\]/$1ENDIF/; s/^(\s+)\]/$1ENDIF/;
...@@ -109,7 +142,7 @@ while (<>) { ...@@ -109,7 +142,7 @@ while (<>) {
# won't match the original source file (we could use the .line # won't match the original source file (we could use the .line
# directive, which is documented to be obsolete, but then gdb will # directive, which is documented to be obsolete, but then gdb will
# show the wrong line in the translated source file). # show the wrong line in the translated source file).
s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/; s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/ unless ($apple);
} }
} }
...@@ -131,9 +164,13 @@ while (<>) { ...@@ -131,9 +164,13 @@ while (<>) {
$prefix = ""; $prefix = "";
if ($proc) if ($proc)
{ {
$prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc); $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple);
# Make sure we $prefix isn't empty here (for the $apple case).
# We handle mangling the label here, make sure it doesn't match
# the label handling below (if $prefix would be empty).
$prefix = "; ";
push(@proc_stack, $proc); push(@proc_stack, $proc);
s/^[A-Za-z_\.]\w+/$&:/; s/^[A-Za-z_\.]\w+/$symprefix$&:/;
} }
$prefix = $prefix."\t.thumb_func; " if ($thumb); $prefix = $prefix."\t.thumb_func; " if ($thumb);
s/\bPROC\b/@ $&/; s/\bPROC\b/@ $&/;
...@@ -146,7 +183,7 @@ while (<>) { ...@@ -146,7 +183,7 @@ while (<>) {
my $proc; my $proc;
s/\bENDP\b/@ $&/; s/\bENDP\b/@ $&/;
$proc = pop(@proc_stack); $proc = pop(@proc_stack);
$_ = "\t.size $proc, .-$proc".$_ if ($proc); $_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple);
} }
s/\bSUBT\b/@ $&/; s/\bSUBT\b/@ $&/;
s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25 s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25
...@@ -311,6 +348,6 @@ while (<>) { ...@@ -311,6 +348,6 @@ while (<>) {
} }
#If we had a code section, mark that this object doesn't need an executable #If we had a code section, mark that this object doesn't need an executable
# stack. # stack.
if ($nxstack) { if ($nxstack && !$apple) {
printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n"); printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n");
} }
...@@ -30,10 +30,15 @@ ...@@ -30,10 +30,15 @@
#endif #endif
#include "pitch.h" #include "pitch.h"
#include "kiss_fft.h"
#include "mdct.h"
#if defined(OPUS_HAVE_RTCD) #if defined(OPUS_HAVE_RTCD)
# if defined(FIXED_POINT) # if defined(FIXED_POINT)
# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int , int) = { const opus_val16 *, opus_val32 *, int , int) = {
celt_pitch_xcorr_c, /* ARMv4 */ celt_pitch_xcorr_c, /* ARMv4 */
...@@ -41,9 +46,98 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, ...@@ -41,9 +46,98 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
}; };
# else
# error "Floating-point implementation is not supported by ARM asm yet." \ # endif
"Reconfigure with --disable-rtcd or send patches." # else /* !FIXED_POINT */
# endif # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
# endif
# endif /* FIXED_POINT */
#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* ARMv4 */
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
};
#endif #endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# if defined(HAVE_ARM_NE10)
# if defined(CUSTOM_MODES)
int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
opus_fft_alloc_arm_neon /* Neon with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
opus_fft_free_arm_neon /* Neon with NE10 */
};
# endif /* CUSTOM_MODES */
void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout) = {
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
opus_fft_neon /* Neon with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout) = {
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
opus_ifft_neon /* Neon with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift,
int stride, int arch) = {
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
clt_mdct_forward_neon /* Neon with NE10 */
};
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift,
int stride, int arch) = {
clt_mdct_backward_c, /* ARMv4 */
clt_mdct_backward_c, /* EDSP */
clt_mdct_backward_c, /* Media */
clt_mdct_backward_neon /* Neon with NE10 */
};
# endif /* HAVE_ARM_NE10 */
# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
#endif /* OPUS_HAVE_RTCD */
...@@ -37,11 +37,12 @@ ...@@ -37,11 +37,12 @@
#include "cpu_support.h" #include "cpu_support.h"
#include "os_support.h" #include "os_support.h"
#include "opus_types.h" #include "opus_types.h"
#include "arch.h"
#define OPUS_CPU_ARM_V4 (1) #define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4)
#define OPUS_CPU_ARM_EDSP (1<<1) #define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA (1<<2) #define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON (1<<3) #define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
#if defined(_MSC_VER) #if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
...@@ -55,29 +56,31 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ ...@@ -55,29 +56,31 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
/* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
* instructions via their assembled hex code. * instructions via their assembled hex code.
* All of these instructions should be essentially nops. */ * All of these instructions should be essentially nops. */
# if defined(OPUS_ARM_MAY_HAVE_EDSP) # if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{ __try{
/*PLD [r13]*/ /*PLD [r13]*/
__emit(0xF5DDF000); __emit(0xF5DDF000);
flags|=OPUS_CPU_ARM_EDSP; flags|=OPUS_CPU_ARM_EDSP_FLAG;
} }
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/ /*Ignore exception.*/
} }
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) # if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{ __try{
/*SHADD8 r3,r3,r3*/ /*SHADD8 r3,r3,r3*/
__emit(0xE6333F93); __emit(0xE6333F93);
flags|=OPUS_CPU_ARM_MEDIA; flags|=OPUS_CPU_ARM_MEDIA_FLAG;
} }
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/ /*Ignore exception.*/
} }
# if defined(OPUS_ARM_MAY_HAVE_NEON) # if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{ __try{
/*VORR q0,q0,q0*/ /*VORR q0,q0,q0*/
__emit(0xF2200150); __emit(0xF2200150);
flags|=OPUS_CPU_ARM_NEON; flags|=OPUS_CPU_ARM_NEON_FLAG;
} }
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/ /*Ignore exception.*/
...@@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void) ...@@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void)
while(fgets(buf, 512, cpuinfo) != NULL) while(fgets(buf, 512, cpuinfo) != NULL)
{ {
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) # if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for edsp and neon flag */ /* Search for edsp and neon flag */
if(memcmp(buf, "Features", 8) == 0) if(memcmp(buf, "Features", 8) == 0)
{ {
char *p; char *p;
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
p = strstr(buf, " edsp"); p = strstr(buf, " edsp");
if(p != NULL && (p[5] == ' ' || p[5] == '\n')) if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_EDSP; flags |= OPUS_CPU_ARM_EDSP_FLAG;
# endif
# if defined(OPUS_ARM_MAY_HAVE_NEON) # if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
p = strstr(buf, " neon"); p = strstr(buf, " neon");
if(p != NULL && (p[5] == ' ' || p[5] == '\n')) if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON; flags |= OPUS_CPU_ARM_NEON_FLAG;
# endif # endif
} }
# endif # endif
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) # if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for media capabilities (>= ARMv6) */ /* Search for media capabilities (>= ARMv6) */
if(memcmp(buf, "CPU architecture:", 17) == 0) if(memcmp(buf, "CPU architecture:", 17) == 0)
{ {
...@@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void) ...@@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void)
version = atoi(buf+17); version = atoi(buf+17);
if(version >= 6) if(version >= 6)
flags |= OPUS_CPU_ARM_MEDIA; flags |= OPUS_CPU_ARM_MEDIA_FLAG;
} }
# endif # endif
} }
...@@ -156,18 +159,26 @@ int opus_select_arch(void) ...@@ -156,18 +159,26 @@ int opus_select_arch(void)
opus_uint32 flags = opus_cpu_capabilities(); opus_uint32 flags = opus_cpu_capabilities();
int arch = 0; int arch = 0;
if(!(flags & OPUS_CPU_ARM_EDSP)) if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
/* Asserts ensure arch values are sequential */
celt_assert(arch == OPUS_ARCH_ARM_V4);
return arch; return arch;
}
arch++; arch++;
if(!(flags & OPUS_CPU_ARM_MEDIA)) if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_EDSP);
return arch; return arch;
}
arch++; arch++;
if(!(flags & OPUS_CPU_ARM_NEON)) if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
return arch; return arch;
}
arch++; arch++;
celt_assert(arch == OPUS_ARCH_ARM_NEON);
return arch; return arch;
} }
......
...@@ -66,6 +66,12 @@ ...@@ -66,6 +66,12 @@
# if defined(OPUS_HAVE_RTCD) # if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void); int opus_select_arch(void);
#define OPUS_ARCH_ARM_V4 (0)
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
# endif # endif
#endif #endif
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_fft.c
@brief ARM Neon optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include <NE10_init.h>
#include <NE10_dsp.h>
#include "os_support.h"
#include "kiss_fft.h"
#include "stack_alloc.h"
#if !defined(FIXED_POINT)
# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
#else
# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
#endif
#if defined(CUSTOM_MODES)
/* nfft lengths in NE10 that support scaled fft */
# define NE10_FFTSCALED_SUPPORT_MAX 4
static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
480, 240, 120, 60
};
int opus_fft_alloc_arm_neon(kiss_fft_state *st)
{
int i;
size_t memneeded = sizeof(struct arch_fft_state);
st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
if (!st->arch_fft)
return -1;
for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
if(st->nfft == ne10_fft_scaled_support[i])
break;
}
if (i == NE10_FFTSCALED_SUPPORT_MAX) {
/* This nfft length (scaled fft) is not supported in NE10 */
st->arch_fft->is_supported = 0;
st->arch_fft->priv = NULL;
}
else {
st->arch_fft->is_supported = 1;
st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
if (st->arch_fft->priv == NULL) {
return -1;
}
}
return 0;
}
void opus_fft_free_arm_neon(kiss_fft_state *st)
{
NE10_FFT_CFG_TYPE_T cfg;
if (!st->arch_fft)
return;
cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
if (cfg)
NE10_FFT_DESTROY_C2C_TYPE(cfg);
opus_free(st->arch_fft);
}
#endif
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_fft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_forward_scaled = 1;
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0, 1);
#endif
}
RESTORE_STACK;
}
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_ifft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_backward_scaled = 0;
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1, 0);
#endif
}
RESTORE_STACK;
}
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_mdct.c
@brief ARM Neon optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include "kiss_fft.h"
#include "_kiss_fft_guts.h"
#include "mdct.h"
#include "stack_alloc.h"
void clt_mdct_forward_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
SAVE_STACK;
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
for(i=0;i<((overlap+3)>>2);i++)
{
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
*yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
*yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
wp1 = window;
wp2 = window+overlap-1;
for(;i<N4-((overlap+3)>>2);i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = *xp2;
*yp++ = *xp1;
xp1+=2;
xp2-=2;
}
for(;i<N4;i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
*yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
}
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_cpx yc;
kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
t0 = t[i];
t1 = t[N4+i];
re = *yp++;
im = *yp++;
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
f2[i] = yc;
}
}
opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
void clt_mdct_backward_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window,
int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
const kiss_twiddle_scalar *trig;
const kiss_fft_state *st = l->kfft[shift];
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
/* Pre-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
yp[2*i] = yr;
yp[2*i+1] = yi;
xp1+=2*stride;
xp2-=2*stride;
}
}
opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
{
kiss_fft_scalar * yp0 = out+(overlap>>1);
kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &trig[0];
/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
middle pair will be computed twice. */
for(i=0;i<(N4+1)>>1;i++)
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
re = yp0[0];
im = yp0[1];
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
re = yp1[0];
im = yp1[1];
yp0[0] = yr;
yp1[1] = yi;
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
yp1 -= 2;
}
}
/* Mirror on both sides for TDAC */
{
kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
const opus_val16 * OPUS_RESTRICT wp1 = window;
const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
for(i = 0; i < overlap/2; i++)
{
kiss_fft_scalar x1, x2;
x1 = *xp1;
x2 = *yp1;
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
wp1++;
wp2--;
}
}
RESTORE_STACK;
}
/* Copyright (c) 2014-2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_neon_intr.c
@brief ARM Neon Intrinsic optimizations for celt
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "../pitch.h"
#if defined(FIXED_POINT)
void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
int32x4_t a = vld1q_s32(sum);
/* Load y[0...3] */
/* This requires len>0 to always be valid (which we assert in the C code). */
int16x4_t y0 = vld1_s16(y);
y += 4;
for (j = 0; j + 8 <= len; j += 8)
{
/* Load x[0...7] */
int16x8_t xx = vld1q_s16(x);
int16x4_t x0 = vget_low_s16(xx);
int16x4_t x4 = vget_high_s16(xx);
/* Load y[4...11] */
int16x8_t yy = vld1q_s16(y);
int16x4_t y4 = vget_low_s16(yy);
int16x4_t y8 = vget_high_s16(yy);
int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
int16x4_t y1 = vext_s16(y0, y4, 1);
int16x4_t y5 = vext_s16(y4, y8, 1);
int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
int16x4_t y2 = vext_s16(y0, y4, 2);
int16x4_t y6 = vext_s16(y4, y8, 2);
int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
int16x4_t y3 = vext_s16(y0, y4, 3);
int16x4_t y7 = vext_s16(y4, y8, 3);
int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
y0 = y8;
a = a7;
x += 8;
y += 8;
}
for (; j < len; j++)
{
int16x4_t x0 = vld1_dup_s16(x); /* load next x */
int32x4_t a0 = vmlal_s16(a, y0, x0);
int16x4_t y4 = vld1_dup_s16(y); /* load next y */
y0 = vext_s16(y0, y4, 1);
a = a0;
x++;
y++;
}
vst1q_s32(sum, a);
}
#else
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------
* Computes 4 correlation values and stores them in sum[4]
*/
static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
float32_t sum[4], int len) {
float32x4_t YY[3];
float32x4_t YEXT[3];
float32x4_t XX[2];
float32x2_t XX_2;
float32x4_t SUMM;
const float32_t *xi = x;
const float32_t *yi = y;
celt_assert(len>0);
YY[0] = vld1q_f32(yi);
SUMM = vdupq_n_f32(0);
/* Consume 8 elements in x vector and 12 elements in y
* vector. However, the 12'th element never really gets
* touched in this loop. So, if len == 8, then we only
* must access y[0] to y[10]. y[11] must not be accessed
* hence make sure len > 8 and not len >= 8
*/
while (len > 8) {
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
YEXT[0] = vextq_f32(YY[0], YY[1], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
YEXT[1] = vextq_f32(YY[0], YY[1], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
YEXT[2] = vextq_f32(YY[0], YY[1], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0);
YEXT[0] = vextq_f32(YY[1], YY[2], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1);
YEXT[1] = vextq_f32(YY[1], YY[2], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0);
YEXT[2] = vextq_f32(YY[1], YY[2], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1);
YY[0] = YY[2];
len -= 8;
}
/* Consume 4 elements in x vector and 8 elements in y
* vector. However, the 8'th element in y never really gets
* touched in this loop. So, if len == 4, then we only
* must access y[0] to y[6]. y[7] must not be accessed
* hence make sure len>4 and not len>=4
*/
if (len > 4) {
yi += 4;
YY[1] = vld1q_f32(yi);
XX[0] = vld1q_f32(xi);
xi += 4;
SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
YEXT[0] = vextq_f32(YY[0], YY[1], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
YEXT[1] = vextq_f32(YY[0], YY[1], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
YEXT[2] = vextq_f32(YY[0], YY[1], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
YY[0] = YY[1];
len -= 4;
}
while (--len > 0) {
XX_2 = vld1_dup_f32(xi++);
SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
YY[0]= vld1q_f32(++yi);
}
XX_2 = vld1_dup_f32(xi);
SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
vst1q_f32(sum, SUMM);
}
/*
* Function: xcorr_kernel_neon_float_process1
* ---------------------------------
* Computes single correlation values and stores in *sum
*/
static void xcorr_kernel_neon_float_process1(const float32_t *x,
const float32_t *y, float32_t *sum, int len) {
float32x4_t XX[4];
float32x4_t YY[4];
float32x2_t XX_2;
float32x2_t YY_2;
float32x4_t SUMM;
float32x2_t SUMM_2[2];
const float32_t *xi = x;
const float32_t *yi = y;
SUMM = vdupq_n_f32(0);
/* Work on 16 values per iteration */
while (len >= 16) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
XX[2] = vld1q_f32(xi);
xi += 4;
XX[3] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
yi += 4;
YY[3] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
SUMM = vmlaq_f32(SUMM, YY[2], XX[2]);
SUMM = vmlaq_f32(SUMM, YY[3], XX[3]);
len -= 16;
}
/* Work on 8 values */
if (len >= 8) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
len -= 8;
}
/* Work on 4 values */
if (len >= 4) {
XX[0] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
len -= 4;
}
/* Start accumulating results */
SUMM_2[0] = vget_low_f32(SUMM);
if (len >= 2) {
/* While at it, consume 2 more values if available */
XX_2 = vld1_f32(xi);
xi += 2;
YY_2 = vld1_f32(yi);
yi += 2;
SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2);
len -= 2;
}
SUMM_2[1] = vget_high_f32(SUMM);
SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]);
SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]);
/* Ok, now we have result accumulated in SUMM_2[0].0 */
if (len > 0) {
/* Case when you have one value left */
XX_2 = vld1_dup_f32(xi);
YY_2 = vld1_dup_f32(yi);
SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2);
}
vst1_lane_f32(sum, SUMM_2[0], 0);
}
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch) {
int i;
celt_assert(max_pitch > 0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i = 0; i < (max_pitch-3); i += 4) {
xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
(float32_t *)xcorr+i, len);
}
/* In case max_pitch isn't multiple of 4
* compute single correlation value per iteration
*/
for (; i < max_pitch; i++) {
xcorr_kernel_neon_float_process1((const float32_t *)_x,
(const float32_t *)_y+i, (float32_t *)xcorr+i, len);
}
}
#endif
This diff is collapsed.
...@@ -42,6 +42,7 @@ IF OPUS_ARM_MAY_HAVE_NEON ...@@ -42,6 +42,7 @@ IF OPUS_ARM_MAY_HAVE_NEON
; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 ; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
xcorr_kernel_neon PROC xcorr_kernel_neon PROC
xcorr_kernel_neon_start
; input: ; input:
; r3 = int len ; r3 = int len
; r4 = opus_val16 *x ; r4 = opus_val16 *x
...@@ -181,7 +182,7 @@ celt_pitch_xcorr_neon_process4 ...@@ -181,7 +182,7 @@ celt_pitch_xcorr_neon_process4
VEOR q0, q0, q0 VEOR q0, q0, q0
; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
; So we don't save/restore any other registers. ; So we don't save/restore any other registers.
BL xcorr_kernel_neon BL xcorr_kernel_neon_start
SUBS r6, r6, #4 SUBS r6, r6, #4
VST1.32 {q0}, [r2]! VST1.32 {q0}, [r2]!
; _y += 4 ; _y += 4
...@@ -257,6 +258,7 @@ IF OPUS_ARM_MAY_HAVE_EDSP ...@@ -257,6 +258,7 @@ IF OPUS_ARM_MAY_HAVE_EDSP
; This will get used on ARMv7 devices without NEON, so it has been optimized ; This will get used on ARMv7 devices without NEON, so it has been optimized
; to take advantage of dual-issuing where possible. ; to take advantage of dual-issuing where possible.
xcorr_kernel_edsp PROC xcorr_kernel_edsp PROC
xcorr_kernel_edsp_start
; input: ; input:
; r3 = int len ; r3 = int len
; r4 = opus_val16 *_x (must be 32-bit aligned) ; r4 = opus_val16 *_x (must be 32-bit aligned)
...@@ -309,7 +311,7 @@ xcorr_kernel_edsp_process4_done ...@@ -309,7 +311,7 @@ xcorr_kernel_edsp_process4_done
SUBS r2, r2, #1 ; j-- SUBS r2, r2, #1 ; j--
; Stall ; Stall
SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0)
LDRGTH r14, [r4], #2 ; r14 = *x++ LDRHGT r14, [r4], #2 ; r14 = *x++
SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1)
SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2)
SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3)
...@@ -319,7 +321,7 @@ xcorr_kernel_edsp_process4_done ...@@ -319,7 +321,7 @@ xcorr_kernel_edsp_process4_done
SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2)
LDRH r10, [r5], #2 ; r10 = y_4 = *y++ LDRH r10, [r5], #2 ; r10 = y_4 = *y++
SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3)
LDRGTH r12, [r4], #2 ; r12 = *x++ LDRHGT r12, [r4], #2 ; r12 = *x++
SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4)
BLE xcorr_kernel_edsp_done BLE xcorr_kernel_edsp_done
SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2)
...@@ -327,7 +329,7 @@ xcorr_kernel_edsp_process4_done ...@@ -327,7 +329,7 @@ xcorr_kernel_edsp_process4_done
SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3)
LDRH r2, [r5], #2 ; r2 = y_5 = *y++ LDRH r2, [r5], #2 ; r2 = y_5 = *y++
SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4)
LDRGTH r14, [r4] ; r14 = *x LDRHGT r14, [r4] ; r14 = *x
SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5)
BLE xcorr_kernel_edsp_done BLE xcorr_kernel_edsp_done
SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3)
...@@ -387,11 +389,11 @@ celt_pitch_xcorr_edsp_process1u_loop4 ...@@ -387,11 +389,11 @@ celt_pitch_xcorr_edsp_process1u_loop4
celt_pitch_xcorr_edsp_process1u_loop4_done celt_pitch_xcorr_edsp_process1u_loop4_done
ADDS r12, r12, #4 ADDS r12, r12, #4
celt_pitch_xcorr_edsp_process1u_loop1 celt_pitch_xcorr_edsp_process1u_loop1
LDRGEH r6, [r4], #2 LDRHGE r6, [r4], #2
; Stall ; Stall
SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
SUBGES r12, r12, #1 SUBSGE r12, r12, #1
LDRGTH r8, [r5], #2 LDRHGT r8, [r5], #2
BGT celt_pitch_xcorr_edsp_process1u_loop1 BGT celt_pitch_xcorr_edsp_process1u_loop1
; Restore _x ; Restore _x
SUB r4, r4, r3, LSL #1 SUB r4, r4, r3, LSL #1
...@@ -416,7 +418,7 @@ celt_pitch_xcorr_edsp_process4 ...@@ -416,7 +418,7 @@ celt_pitch_xcorr_edsp_process4
MOV r7, #0 MOV r7, #0
MOV r8, #0 MOV r8, #0
MOV r9, #0 MOV r9, #0
BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
CMP r0, r6 CMP r0, r6
; _y+=4 ; _y+=4
...@@ -474,7 +476,7 @@ celt_pitch_xcorr_edsp_process2_1 ...@@ -474,7 +476,7 @@ celt_pitch_xcorr_edsp_process2_1
ADDS r12, r12, #1 ADDS r12, r12, #1
; Stall ; Stall
SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
LDRGTH r7, [r4], #2 LDRHGT r7, [r4], #2
SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
BLE celt_pitch_xcorr_edsp_process2_done BLE celt_pitch_xcorr_edsp_process2_done
LDRH r9, [r5], #2 LDRH r9, [r5], #2
...@@ -527,8 +529,8 @@ celt_pitch_xcorr_edsp_process1a_loop_done ...@@ -527,8 +529,8 @@ celt_pitch_xcorr_edsp_process1a_loop_done
SUBGE r12, r12, #2 SUBGE r12, r12, #2
SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1)
ADDS r12, r12, #1 ADDS r12, r12, #1
LDRGEH r6, [r4], #2 LDRHGE r6, [r4], #2
LDRGEH r8, [r5], #2 LDRHGE r8, [r5], #2
; Stall ; Stall
SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
; maxcorr = max(maxcorr, sum) ; maxcorr = max(maxcorr, sum)
......
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file fft_arm.h
@brief ARM Neon Intrinsic optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(FFT_ARM_H)
#define FFT_ARM_H
#include "config.h"
#include "kiss_fft.h"
#if defined(HAVE_ARM_NE10)
int opus_fft_alloc_arm_neon(kiss_fft_state *st);
void opus_fft_free_arm_neon(kiss_fft_state *st);
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_FFT (1)
#define opus_fft_alloc_arch(_st, arch) \
((void)(arch), opus_fft_alloc_arm_neon(_st))
#define opus_fft_free_arch(_st, arch) \
((void)(arch), opus_fft_free_arm_neon(_st))
#define opus_fft(_st, _fin, _fout, arch) \
((void)(arch), opus_fft_neon(_st, _fin, _fout))
#define opus_ifft(_st, _fin, _fout, arch) \
((void)(arch), opus_ifft_neon(_st, _fin, _fout))
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif
/* Copyright (C) 2015 Vidyo */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FIXED_ARM64_H
#define FIXED_ARM64_H
#include <arm_neon.h>
#undef SIG2WORD16
#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT)))
#endif
...@@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) ...@@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
#undef MAC16_32_Q15 #undef MAC16_32_Q15
#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
Result fits in 32 bits. */
#undef MAC16_32_Q16
#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#undef MULT32_32_Q31 #undef MULT32_32_Q31
......
...@@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, ...@@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
} }
#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
Result fits in 32 bits. */
#undef MAC16_32_Q16
static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
opus_val32 b)
{
int res;
__asm__(
"#MAC16_32_Q16\n\t"
"smlawb %0, %1, %2, %3;\n"
: "=r"(res)
: "r"(b), "r"(a), "r"(c)
);
return res;
}
#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
/** 16x16 multiply-add where the result fits in 32 bits */ /** 16x16 multiply-add where the result fits in 32 bits */
#undef MAC16_16 #undef MAC16_16
static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
...@@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) ...@@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
} }
#define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) #define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
#ifdef OPUS_ARM_INLINE_MEDIA
#undef SIG2WORD16
static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
{
celt_sig res;
__asm__(
"#SIG2WORD16\n\t"
"ssat %0, #16, %1, ASR #12\n\t"
: "=r"(res)
: "r"(x+2048)
);
return EXTRACT16(res);
}
#define SIG2WORD16(x) (SIG2WORD16_armv6(x))
#endif /* OPUS_ARM_INLINE_MEDIA */
#endif #endif
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file arm_mdct.h
@brief ARM Neon Intrinsic optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(MDCT_ARM_H)
#define MDCT_ARM_H
#include "config.h"
#include "mdct.h"
#if defined(HAVE_ARM_NE10)
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_MDCT (1)
#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif
...@@ -46,12 +46,81 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, ...@@ -46,12 +46,81 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch); opus_val32 *xcorr, int len, int max_pitch);
# endif # endif
# if !defined(OPUS_HAVE_RTCD) # if defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
extern opus_val32
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_EDSP) || \
defined(OPUS_ARM_PRESUME_MEDIA) || \
defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_PITCH_XCORR (1) # define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
# endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void xcorr_kernel_neon_fixed(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
# endif
#else /* Start !FIXED_POINT */
/* Float case */
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
#endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
# endif # endif
# endif #endif /* end !FIXED_POINT */
#endif #endif
This diff is collapsed.
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
* @param X Spectrum * @param X Spectrum
* @param bandE Square root of the energy for each band (returned) * @param bandE Square root of the energy for each band (returned)
*/ */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M); void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ /*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
...@@ -59,14 +59,15 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel ...@@ -59,14 +59,15 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
* @param bandE Square root of the energy for each band * @param bandE Square root of the energy for each band
*/ */
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M); celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
int end, int M, int downsample, int silence);
#define SPREAD_NONE (0) #define SPREAD_NONE (0)
#define SPREAD_LIGHT (1) #define SPREAD_LIGHT (1)
#define SPREAD_NORMAL (2) #define SPREAD_NORMAL (2)
#define SPREAD_AGGRESSIVE (3) #define SPREAD_AGGRESSIVE (3)
int spreading_decision(const CELTMode *m, celt_norm *X, int *average, int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf, int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M); int end, int C, int M);
...@@ -97,15 +98,20 @@ void haar1(celt_norm *X, int N0, int stride); ...@@ -97,15 +98,20 @@ void haar1(celt_norm *X, int N0, int stride);
* @param LM log2() of the number of 2.5 subframes in the frame * @param LM log2() of the number of 2.5 subframes in the frame
* @param codedBands Last band to receive bits + 1 * @param codedBands Last band to receive bits + 1
* @param seed Random generator seed * @param seed Random generator seed
* @param arch Run-time architecture (see opus_select_arch())
*/ */
void quant_all_bands(int encode, const CELTMode *m, int start, int end, void quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed); int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, int complexity, int arch, int disable_inv);
int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
opus_val16 *prev2logE, int *pulses, opus_uint32 seed); void anti_collapse(const CELTMode *m, celt_norm *X_,
unsigned char *collapse_masks, int LM, int C, int size, int start,
int end, const opus_val16 *logE, const opus_val16 *prev1logE,
const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed,
int arch);
opus_uint32 celt_lcg_rand(opus_uint32 seed); opus_uint32 celt_lcg_rand(opus_uint32 seed);
......
...@@ -54,6 +54,10 @@ ...@@ -54,6 +54,10 @@
#define PACKAGE_VERSION "unknown" #define PACKAGE_VERSION "unknown"
#endif #endif
#if defined(MIPSr1_ASM)
#include "mips/celt_mipsr1.h"
#endif
int resampling_factor(opus_int32 rate) int resampling_factor(opus_int32 rate)
{ {
...@@ -85,8 +89,77 @@ int resampling_factor(opus_int32 rate) ...@@ -85,8 +89,77 @@ int resampling_factor(opus_int32 rate)
return ret; return ret;
} }
#ifndef OVERRIDE_COMB_FILTER_CONST #if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, /* This version should be faster on ARM */
#ifdef OPUS_ARM_ASM
#ifndef NON_STATIC_COMB_FILTER_CONST_C
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
opus_val32 x0, x1, x2, x3, x4;
int i;
x4 = SHL32(x[-T-2], 1);
x3 = SHL32(x[-T-1], 1);
x2 = SHL32(x[-T], 1);
x1 = SHL32(x[-T+1], 1);
for (i=0;i<N-4;i+=5)
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=SHL32(x[i-T+3],1);
t = MAC16_32_Q16(x[i+1], g10, x1);
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
t = SATURATE(t, SIG_SAT);
y[i+1] = t;
x3=SHL32(x[i-T+4],1);
t = MAC16_32_Q16(x[i+2], g10, x0);
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
t = SATURATE(t, SIG_SAT);
y[i+2] = t;
x2=SHL32(x[i-T+5],1);
t = MAC16_32_Q16(x[i+3], g10, x4);
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
t = SATURATE(t, SIG_SAT);
y[i+3] = t;
x1=SHL32(x[i-T+6],1);
t = MAC16_32_Q16(x[i+4], g10, x3);
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
t = SATURATE(t, SIG_SAT);
y[i+4] = t;
}
#ifdef CUSTOM_MODES
for (;i<N;i++)
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=x3;
x3=x2;
x2=x1;
x1=x0;
}
#endif
}
#else
#ifndef NON_STATIC_COMB_FILTER_CONST_C
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12) opus_val16 g10, opus_val16 g11, opus_val16 g12)
{ {
opus_val32 x0, x1, x2, x3, x4; opus_val32 x0, x1, x2, x3, x4;
...@@ -102,6 +175,7 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, ...@@ -102,6 +175,7 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+ MULT16_32_Q15(g10,x2) + MULT16_32_Q15(g10,x2)
+ MULT16_32_Q15(g11,ADD32(x1,x3)) + MULT16_32_Q15(g11,ADD32(x1,x3))
+ MULT16_32_Q15(g12,ADD32(x0,x4)); + MULT16_32_Q15(g12,ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3; x4=x3;
x3=x2; x3=x2;
x2=x1; x2=x1;
...@@ -110,10 +184,12 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, ...@@ -110,10 +184,12 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
} }
#endif #endif
#endif
#ifndef OVERRIDE_comb_filter
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap) const opus_val16 *window, int overlap, int arch)
{ {
int i; int i;
/* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
...@@ -131,16 +207,23 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, ...@@ -131,16 +207,23 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
OPUS_MOVE(y, x, N); OPUS_MOVE(y, x, N);
return; return;
} }
g00 = MULT16_16_Q15(g0, gains[tapset0][0]); /* When the gain is zero, T0 and/or T1 is set to zero. We need
g01 = MULT16_16_Q15(g0, gains[tapset0][1]); to have then be at least 2 to avoid processing garbage data. */
g02 = MULT16_16_Q15(g0, gains[tapset0][2]); T0 = IMAX(T0, COMBFILTER_MINPERIOD);
g10 = MULT16_16_Q15(g1, gains[tapset1][0]); T1 = IMAX(T1, COMBFILTER_MINPERIOD);
g11 = MULT16_16_Q15(g1, gains[tapset1][1]); g00 = MULT16_16_P15(g0, gains[tapset0][0]);
g12 = MULT16_16_Q15(g1, gains[tapset1][2]); g01 = MULT16_16_P15(g0, gains[tapset0][1]);
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
g10 = MULT16_16_P15(g1, gains[tapset1][0]);
g11 = MULT16_16_P15(g1, gains[tapset1][1]);
g12 = MULT16_16_P15(g1, gains[tapset1][2]);
x1 = x[-T1+1]; x1 = x[-T1+1];
x2 = x[-T1 ]; x2 = x[-T1 ];
x3 = x[-T1-1]; x3 = x[-T1-1];
x4 = x[-T1-2]; x4 = x[-T1-2];
/* If the filter didn't change, we don't need the overlap */
if (g0==g1 && T0==T1 && tapset0==tapset1)
overlap=0;
for (i=0;i<overlap;i++) for (i=0;i<overlap;i++)
{ {
opus_val16 f; opus_val16 f;
...@@ -153,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, ...@@ -153,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+ MULT16_32_Q15(MULT16_16_Q15(f,g10),x2) + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+ MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3)) + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+ MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4)); + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3; x4=x3;
x3=x2; x3=x2;
x2=x1; x2=x1;
...@@ -168,14 +252,20 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, ...@@ -168,14 +252,20 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
} }
/* Compute the part with the constant filter. */ /* Compute the part with the constant filter. */
comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12); comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch);
} }
#endif /* OVERRIDE_comb_filter */
/* TF change table. Positive values mean better frequency resolution (longer
effective window), whereas negative values mean better time resolution
(shorter effective window). The second index is computed as:
4*isTransient + 2*tf_select + per_band_flag */
const signed char tf_select_table[4][8] = { const signed char tf_select_table[4][8] = {
{0, -1, 0, -1, 0,-1, 0,-1}, /*isTransient=0 isTransient=1 */
{0, -1, 0, -2, 1, 0, 1,-1}, {0, -1, 0, -1, 0,-1, 0,-1}, /* 2.5 ms */
{0, -2, 0, -3, 2, 0, 1,-1}, {0, -1, 0, -2, 1, 0, 1,-1}, /* 5 ms */
{0, -2, 0, -3, 3, 0, 1,-1}, {0, -2, 0, -3, 2, 0, 1,-1}, /* 10 ms */
{0, -2, 0, -3, 3, 0, 1,-1}, /* 20 ms */
}; };
...@@ -213,6 +303,9 @@ const char *opus_strerror(int error) ...@@ -213,6 +303,9 @@ const char *opus_strerror(int error)
const char *opus_get_version_string(void) const char *opus_get_version_string(void)
{ {
return "libopus " PACKAGE_VERSION return "libopus " PACKAGE_VERSION
/* Applications may rely on the presence of this substring in the version
string to determine if they have a fixed-point or floating-point build
at runtime. */
#ifdef FIXED_POINT #ifdef FIXED_POINT
"-fixed" "-fixed"
#endif #endif
......
...@@ -57,13 +57,21 @@ typedef struct { ...@@ -57,13 +57,21 @@ typedef struct {
float noisiness; float noisiness;
float activity; float activity;
float music_prob; float music_prob;
int bandwidth; int bandwidth;
}AnalysisInfo; float activity_probability;
} AnalysisInfo;
typedef struct {
int signalType;
int offset;
} SILKInfo;
#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) #define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) #define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
/* Encoder/decoder Requests */ /* Encoder/decoder Requests */
/* Expose this option again when variable framesize actually works */ /* Expose this option again when variable framesize actually works */
...@@ -116,6 +124,9 @@ typedef struct { ...@@ -116,6 +124,9 @@ typedef struct {
#define OPUS_SET_ENERGY_MASK_REQUEST 10026 #define OPUS_SET_ENERGY_MASK_REQUEST 10026
#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
#define CELT_SET_SILK_INFO_REQUEST 10028
#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
/* Encoder stuff */ /* Encoder stuff */
int celt_encoder_get_size(int channels); int celt_encoder_get_size(int channels);
...@@ -134,7 +145,8 @@ int celt_decoder_get_size(int channels); ...@@ -134,7 +145,8 @@ int celt_decoder_get_size(int channels);
int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec); int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
#define celt_encoder_ctl opus_custom_encoder_ctl #define celt_encoder_ctl opus_custom_encoder_ctl
#define celt_decoder_ctl opus_custom_decoder_ctl #define celt_decoder_ctl opus_custom_decoder_ctl
...@@ -200,15 +212,25 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES ...@@ -200,15 +212,25 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap); const opus_val16 *window, int overlap, int arch);
#ifdef NON_STATIC_COMB_FILTER_CONST_C
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12);
#endif
#ifndef OVERRIDE_COMB_FILTER_CONST
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
#endif
void init_caps(const CELTMode *m,int *cap,int LM,int C); void init_caps(const CELTMode *m,int *cap,int LM,int C);
#ifdef RESYNTH #ifdef RESYNTH
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch); void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
celt_sig * OPUS_RESTRICT out_mem[], int C, int LM); int LM, int downsample, int silence);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
......
This diff is collapsed.
This diff is collapsed.
...@@ -49,8 +49,7 @@ int p ...@@ -49,8 +49,7 @@ int p
float *lpc = _lpc; float *lpc = _lpc;
#endif #endif
for (i = 0; i < p; i++) OPUS_CLEAR(lpc, p);
lpc[i] = 0;
if (ac[0] != 0) if (ac[0] != 0)
{ {
for (i = 0; i < p; i++) { for (i = 0; i < p; i++) {
...@@ -88,12 +87,15 @@ int p ...@@ -88,12 +87,15 @@ int p
#endif #endif
} }
void celt_fir(const opus_val16 *_x,
void celt_fir_c(
const opus_val16 *_x,
const opus_val16 *num, const opus_val16 *num,
opus_val16 *_y, opus_val16 *_y,
int N, int N,
int ord, int ord,
opus_val16 *mem) opus_val16 *mem,
int arch)
{ {
int i,j; int i,j;
VARDECL(opus_val16, rnum); VARDECL(opus_val16, rnum);
...@@ -111,6 +113,7 @@ void celt_fir(const opus_val16 *_x, ...@@ -111,6 +113,7 @@ void celt_fir(const opus_val16 *_x,
for(i=0;i<ord;i++) for(i=0;i<ord;i++)
mem[i] = _x[N-i-1]; mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT #ifdef SMALL_FOOTPRINT
(void)arch;
for (i=0;i<N;i++) for (i=0;i<N;i++)
{ {
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
...@@ -124,7 +127,7 @@ void celt_fir(const opus_val16 *_x, ...@@ -124,7 +127,7 @@ void celt_fir(const opus_val16 *_x,
for (i=0;i<N-3;i+=4) for (i=0;i<N-3;i+=4)
{ {
opus_val32 sum[4]={0,0,0,0}; opus_val32 sum[4]={0,0,0,0};
xcorr_kernel(rnum, x+i, sum, ord); xcorr_kernel(rnum, x+i, sum, ord, arch);
_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
...@@ -146,10 +149,12 @@ void celt_iir(const opus_val32 *_x, ...@@ -146,10 +149,12 @@ void celt_iir(const opus_val32 *_x,
opus_val32 *_y, opus_val32 *_y,
int N, int N,
int ord, int ord,
opus_val16 *mem) opus_val16 *mem,
int arch)
{ {
#ifdef SMALL_FOOTPRINT #ifdef SMALL_FOOTPRINT
int i,j; int i,j;
(void)arch;
for (i=0;i<N;i++) for (i=0;i<N;i++)
{ {
opus_val32 sum = _x[i]; opus_val32 sum = _x[i];
...@@ -161,7 +166,7 @@ void celt_iir(const opus_val32 *_x, ...@@ -161,7 +166,7 @@ void celt_iir(const opus_val32 *_x,
{ {
mem[j]=mem[j-1]; mem[j]=mem[j-1];
} }
mem[0] = ROUND16(sum,SIG_SHIFT); mem[0] = SROUND16(sum, SIG_SHIFT);
_y[i] = sum; _y[i] = sum;
} }
#else #else
...@@ -187,23 +192,23 @@ void celt_iir(const opus_val32 *_x, ...@@ -187,23 +192,23 @@ void celt_iir(const opus_val32 *_x,
sum[1]=_x[i+1]; sum[1]=_x[i+1];
sum[2]=_x[i+2]; sum[2]=_x[i+2];
sum[3]=_x[i+3]; sum[3]=_x[i+3];
xcorr_kernel(rden, y+i, sum, ord); xcorr_kernel(rden, y+i, sum, ord, arch);
/* Patch up the result to compensate for the fact that this is an IIR */ /* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
_y[i ] = sum[0]; _y[i ] = sum[0];
sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
_y[i+1] = sum[1]; _y[i+1] = sum[1];
sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
_y[i+2] = sum[2]; _y[i+2] = sum[2];
sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
_y[i+3] = sum[3]; _y[i+3] = sum[3];
} }
for (;i<N;i++) for (;i<N;i++)
...@@ -211,7 +216,7 @@ void celt_iir(const opus_val32 *_x, ...@@ -211,7 +216,7 @@ void celt_iir(const opus_val32 *_x,
opus_val32 sum = _x[i]; opus_val32 sum = _x[i];
for (j=0;j<ord;j++) for (j=0;j<ord;j++)
sum -= MULT16_16(rden[j],y[i+j]); sum -= MULT16_16(rden[j],y[i+j]);
y[i+ord] = ROUND16(sum,SIG_SHIFT); y[i+ord] = SROUND16(sum,SIG_SHIFT);
_y[i] = sum; _y[i] = sum;
} }
for(i=0;i<ord;i++) for(i=0;i<ord;i++)
......
...@@ -29,24 +29,37 @@ ...@@ -29,24 +29,37 @@
#define PLC_H #define PLC_H
#include "arch.h" #include "arch.h"
#include "cpu_support.h"
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
#include "x86/celt_lpc_sse.h"
#endif
#define LPC_ORDER 24 #define LPC_ORDER 24
void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
void celt_fir(const opus_val16 *x, void celt_fir_c(
const opus_val16 *x,
const opus_val16 *num, const opus_val16 *num,
opus_val16 *y, opus_val16 *y,
int N, int N,
int ord, int ord,
opus_val16 *mem); opus_val16 *mem,
int arch);
#if !defined(OVERRIDE_CELT_FIR)
#define celt_fir(x, num, y, N, ord, mem, arch) \
(celt_fir_c(x, num, y, N, ord, mem, arch))
#endif
void celt_iir(const opus_val32 *x, void celt_iir(const opus_val32 *x,
const opus_val16 *den, const opus_val16 *den,
opus_val32 *y, opus_val32 *y,
int N, int N,
int ord, int ord,
opus_val16 *mem); opus_val16 *mem,
int arch);
int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
const opus_val16 *window, int overlap, int lag, int n, int arch); const opus_val16 *window, int overlap, int lag, int n, int arch);
......
...@@ -31,7 +31,8 @@ ...@@ -31,7 +31,8 @@
#include "opus_types.h" #include "opus_types.h"
#include "opus_defines.h" #include "opus_defines.h"
#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM) #if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/armcpu.h" #include "arm/armcpu.h"
/* We currently support 4 ARM variants: /* We currently support 4 ARM variants:
...@@ -42,6 +43,22 @@ ...@@ -42,6 +43,22 @@
*/ */
#define OPUS_ARCHMASK 3 #define OPUS_ARCHMASK 3
#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
* arch[0] -> non-sse
* arch[1] -> sse
* arch[2] -> sse2
* arch[3] -> sse4.1
* arch[4] -> avx
*/
#define OPUS_ARCHMASK 7
int opus_select_arch(void);
#else #else
#define OPUS_ARCHMASK 0 #define OPUS_ARCHMASK 0
...@@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void) ...@@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void)
return 0; return 0;
} }
#endif #endif
#endif #endif
This diff is collapsed.
...@@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac); ...@@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac);
void encode_pulses(const int *_y, int N, int K, ec_enc *enc); void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
void decode_pulses(int *_y, int N, int K, ec_dec *dec); opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec);
#endif /* CWRS_H */ #endif /* CWRS_H */
This diff is collapsed.
This diff is collapsed.
...@@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ ...@@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
unsigned ec_decode(ec_dec *_this,unsigned _ft){ unsigned ec_decode(ec_dec *_this,unsigned _ft){
unsigned s; unsigned s;
_this->ext=_this->rng/_ft; _this->ext=celt_udiv(_this->rng,_ft);
s=(unsigned)(_this->val/_this->ext); s=(unsigned)(_this->val/_this->ext);
return _ft-EC_MINI(s+1,_ft); return _ft-EC_MINI(s+1,_ft);
} }
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment