Not working.. - Githubissues

What does work

The code in root

What doesnt work

The code in VoiceServer.

See engine.cpp is doing some neat little things to call our python file directly in voices/

We dont need to do that. I wonder about either

a. Calling our pipe service directly..


#include <windows.h>
#include <iostream>
#include <string>
#include <json/json.h>  // Include a JSON library for serializing the request

// Function to send request to pipe server
bool SendRequestToPipe(const std::string& text, std::vector<char>& audio_data) {
    // Connect to the pipe
    HANDLE pipe = CreateFile(
        R"(\\.\pipe\AACSpeakHelper)", // Pipe name
        GENERIC_READ | GENERIC_WRITE,
        0,
        NULL,
        OPEN_EXISTING,
        0,
        NULL);

    if (pipe == INVALID_HANDLE_VALUE) {
        std::cerr << "Error: Could not connect to pipe server.\n";
        return false;
    }

    // Create JSON request
    Json::Value request;
    request["action"] = "speak";
    request["text"] = text;
    request["engine"] = "AzureNeural"; // Or any other engine

    std::string request_data = Json::writeString(Json::StreamWriterBuilder(), request);

    DWORD bytes_written;
    WriteFile(pipe, request_data.c_str(), request_data.size(), &bytes_written, NULL);

    // Read response
    char buffer[65536];
    DWORD bytes_read;
    ReadFile(pipe, buffer, sizeof(buffer), &bytes_read, NULL);

    // Deserialize JSON response
    Json::Value response;
    Json::CharReaderBuilder reader;
    std::string errors;
    std::string response_data(buffer, bytes_read);

    if (!Json::parseFromStream(reader, response_data, &response, &errors)) {
        std::cerr << "Error parsing response from pipe server: " << errors << std::endl;
        CloseHandle(pipe);
        return false;
    }

    if (response["status"] == "success") {
        // Extract audio data
        const Json::Value& audio_chunks = response["audio_data"];
        for (const auto& chunk : audio_chunks) {
            std::vector<char> chunk_data = chunk.asCString();
            audio_data.insert(audio_data.end(), chunk_data.begin(), chunk_data.end());
        }

        CloseHandle(pipe);
        return true;
    }

    CloseHandle(pipe);
    return false;
}

HRESULT __stdcall Engine::Speak(DWORD dwSpeakFlags, REFGUID rguidFormatId, const WAVEFORMATEX* pWaveFormatEx,
                                const SPVTEXTFRAG* pTextFragList, ISpTTSEngineSite* pOutputSite)
{
    slog("Engine::Speak");

    for (const auto* text_frag = pTextFragList; text_frag != nullptr; text_frag = text_frag->pNext) {
        if (handle_actions(pOutputSite) == 1) {
            return S_OK;
        }

        slog(L"action={}, offset={}, length={}, text=\"{}\"",
            (int)text_frag->State.eAction,
            text_frag->ulTextSrcOffset,
            text_frag->ulTextLen, 
            text_frag->pTextStart);

        // Convert wide string to UTF-8
        std::string text = utf8_encode(std::wstring(text_frag->pTextStart, text_frag->ulTextLen));

        std::vector<char> audio_data;
        if (!SendRequestToPipe(text, audio_data)) {
            std::cerr << "Failed to get audio data from pipe server.\n";
            return E_FAIL;
        }

        // Write audio data to the output
        ULONG written;
        HRESULT result = pOutputSite->Write(audio_data.data(), audio_data.size(), &written);
        if (result != S_OK || written != audio_data.size()) {
            std::cerr << "Error writing audio data to output site.\n";
            return E_FAIL;
        }

        slog("Engine::Speak written={} bytes", written);
    }

    return S_OK;
}

b. Call an executable (e.g pyfrozen exe) that calls our pipe service

e.g

#include <windows.h>
#include <iostream>
#include <vector>
#include <string>

// Function to execute the external executable and get its output (audio data)
bool RunExternalTTSProcess(const std::string& text, std::vector<char>& audio_data) {
    STARTUPINFO si;
    PROCESS_INFORMATION pi;
    SECURITY_ATTRIBUTES sa;
    HANDLE hReadPipe, hWritePipe;
    char buffer[4096];
    DWORD bytes_read;

    ZeroMemory(&si, sizeof(si));
    si.cb = sizeof(si);
    ZeroMemory(&pi, sizeof(pi));

    sa.nLength = sizeof(SECURITY_ATTRIBUTES);
    sa.bInheritHandle = TRUE;
    sa.lpSecurityDescriptor = NULL;

    // Create a pipe for the child process's STDOUT
    if (!CreatePipe(&hReadPipe, &hWritePipe, &sa, 0)) {
        std::cerr << "CreatePipe failed\n";
        return false;
    }

    // Ensure the read handle to the pipe is not inherited
    if (!SetHandleInformation(hReadPipe, HANDLE_FLAG_INHERIT, 0)) {
        std::cerr << "SetHandleInformation failed\n";
        return false;
    }

    std::string command = "tts_pipe_exe " + text;  // Call your TTS exe
    if (!CreateProcess(NULL, (LPSTR)command.c_str(), NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi)) {
        std::cerr << "CreateProcess failed\n";
        return false;
    }

    // Read output from the pipe
    while (ReadFile(hReadPipe, buffer, sizeof(buffer), &bytes_read, NULL) && bytes_read > 0) {
        audio_data.insert(audio_data.end(), buffer, buffer + bytes_read);
    }

    // Wait for process to exit and clean up
    WaitForSingleObject(pi.hProcess, INFINITE);
    CloseHandle(pi.hProcess);
    CloseHandle(pi.hThread);
    CloseHandle(hReadPipe);
    CloseHandle(hWritePipe);

    return true;
}

HRESULT __stdcall Engine::Speak(DWORD dwSpeakFlags, REFGUID rguidFormatId, const WAVEFORMATEX* pWaveFormatEx,
                                const SPVTEXTFRAG* pTextFragList, ISpTTSEngineSite* pOutputSite)
{
    slog("Engine::Speak");

    for (const auto* text_frag = pTextFragList; text_frag != nullptr; text_frag = text_frag->pNext) {
        if (handle_actions(pOutputSite) == 1) {
            return S_OK;
        }

        slog(L"action={}, offset={}, length={}, text=\"{}\"",
            (int)text_frag->State.eAction,
            text_frag->ulTextSrcOffset,
            text_frag->ulTextLen,
            text_frag->pTextStart);

        // Convert wide string to UTF-8
        std::string text = utf8_encode(std::wstring(text_frag->pTextStart, text_frag->ulTextLen));

        std::vector<char> audio_data;
        if (!RunExternalTTSProcess(text, audio_data)) {
            std::cerr << "Failed to run external TTS process.\n";
            return E_FAIL;
        }

        // Write audio data to the output
        ULONG written;
        HRESULT result = pOutputSite->Write(audio_data.data(), audio_data.size(), &written);
        if (result != S_OK || written != audio_data.size()) {
            std

AceCentre / SAPI-POC

Not working.. #2

Update