diff --git a/.gitmodules b/.gitmodules index 27efb8e..42df2be 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "subprojects/miniaudio"] path = subprojects/miniaudio url = https://github.com/mackron/miniaudio +[submodule "subprojects/rnnoise"] + path = subprojects/rnnoise + url = https://github.com/xiph/rnnoise diff --git a/CMakeLists.txt b/CMakeLists.txt index c4fdf7a..6bdacc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ option(USE_LIBHANDY "Enable features that require libhandy (default)" ON) option(ENABLE_VOICE "Enable voice suppport" ON) option(USE_KEYCHAIN "Store the token in the keychain (default)" ON) option(ENABLE_NOTIFICATION_SOUNDS "Enable notification sounds (default)" ON) +option(ENABLE_RNNOISE "Enable RNNoise for voice activity detection (default)" ON) find_package(nlohmann_json REQUIRED) find_package(CURL) @@ -150,6 +151,35 @@ if (ENABLE_VOICE) target_link_libraries(abaddon PkgConfig::libsodium) target_link_libraries(abaddon ${CMAKE_DL_LIBS}) + + if (ENABLE_RNNOISE) + find_package(rnnoise QUIET) + if (NOT rnnoise_FOUND) + message("rnnoise was not found and will be included as a submodule") + # This is potentially really stupid + add_library(rnnoise + subprojects/rnnoise/src/arch.h + subprojects/rnnoise/src/celt_lpc.c + subprojects/rnnoise/src/celt_lpc.h + subprojects/rnnoise/src/common.h + subprojects/rnnoise/src/denoise.c + subprojects/rnnoise/src/kiss_fft.c + subprojects/rnnoise/src/kiss_fft.h + subprojects/rnnoise/src/opus_types.h + subprojects/rnnoise/src/pitch.c + subprojects/rnnoise/src/pitch.h + subprojects/rnnoise/src/rnn_data.c + subprojects/rnnoise/src/rnn_data.h + subprojects/rnnoise/src/rnn_reader.c + subprojects/rnnoise/src/rnn.c + subprojects/rnnoise/src/rnn.h + subprojects/rnnoise/src/tansig_table.h + subprojects/rnnoise/src/_kiss_fft_guts.h + subprojects/rnnoise/include/rnnoise.h) + target_include_directories(rnnoise PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/subprojects/rnnoise/include") + target_link_libraries(abaddon rnnoise) + endif () + endif () endif () if (${ENABLE_NOTIFICATION_SOUNDS}) diff --git a/src/audio/manager.cpp b/src/audio/manager.cpp index 6e85ed8..9aacd29 100644 --- a/src/audio/manager.cpp +++ b/src/audio/manager.cpp @@ -55,6 +55,9 @@ void capture_data_callback(ma_device *pDevice, void *pOutput, const void *pInput AudioManager::AudioManager() { m_ok = true; + m_rnnoise = rnnoise_create(nullptr); + spdlog::get("audio")->info("RNNoise expects {} frames", rnnoise_get_frame_size()); + int err; m_encoder = opus_encoder_create(48000, 2, OPUS_APPLICATION_VOIP, &err); if (err != OPUS_OK) { @@ -134,6 +137,7 @@ AudioManager::~AudioManager() { ma_device_uninit(&m_capture_device); ma_context_uninit(&m_context); RemoveAllSSRCs(); + rnnoise_destroy(m_rnnoise); } void AudioManager::AddSSRC(uint32_t ssrc) { @@ -410,7 +414,14 @@ void AudioManager::OnCapturedPCM(const int16_t *pcm, ma_uint32 frames) { UpdateCaptureVolume(new_pcm.data(), frames); - if (m_capture_peak_meter / 32768.0 < m_capture_gate) return; + static float idc[480]; + static float rnnoise_input[480]; + // take left channel + for (int i = 0; i < 480; i++) { + rnnoise_input[i] = static_cast(pcm[i * 2]); + } + float prob = rnnoise_process_frame(m_rnnoise, idc, rnnoise_input); + if (prob < m_capture_gate) return; m_enc_mutex.lock(); int payload_len = opus_encode(m_encoder, new_pcm.data(), 480, static_cast(m_opus_buffer), 1275); diff --git a/src/audio/manager.hpp b/src/audio/manager.hpp index 9cd7f42..28d6d74 100644 --- a/src/audio/manager.hpp +++ b/src/audio/manager.hpp @@ -14,6 +14,8 @@ #include #include #include +#include + #include "devices.hpp" // clang-format on @@ -113,6 +115,8 @@ private: AudioDevices m_devices; + DenoiseState *m_rnnoise; + public: using type_signal_opus_packet = sigc::signal; type_signal_opus_packet signal_opus_packet(); diff --git a/subprojects/rnnoise b/subprojects/rnnoise new file mode 160000 index 0000000..1cbdbcf --- /dev/null +++ b/subprojects/rnnoise @@ -0,0 +1 @@ +Subproject commit 1cbdbcf1283499bbb2230a6b0f126eb9b236defd