k2-fsa / sherpa-onnx

Speech-to-text, text-to-speech, speaker recognition, and VAD using next-gen Kaldi with onnxruntime without Internet connection. Support embedded systems, Android, iOS, Raspberry Pi, RISC-V, x86_64 servers, websocket server/client, C/C++, Python, Kotlin, C#, Go, NodeJS, Java, Swift, Dart, JavaScript, Flutter, Object Pascal, Lazarus, Rust
https://k2-fsa.github.io/sherpa/onnx/index.html
Apache License 2.0
3.08k stars 355 forks source link

Unable to load DLL 'sherpa-onnx-c-api' or one of its dependencies #1050

Closed qiaokesi closed 2 months ago

qiaokesi commented 2 months ago

`// Copyright (c) 2023 Xiaomi Corporation // // This file shows how to use a streaming model for real-time speech // recognition from a microphone. // Please refer to // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html // to download streaming models

using CommandLine.Text; using CommandLine; using PortAudioSharp; using System.Threading; using SherpaOnnx; using System.Collections.Generic; using System.Runtime.InteropServices; using System;

class SpeechRecognitionFromMicrophone { class Options { [Option(Required = true, HelpText = "Path to tokens.txt")] public string Tokens { get; set; }

[Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
public string Provider { get; set; } = "cpu";

[Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
public string Encoder { get; set; }

[Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
public string Decoder { get; set; }

[Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
public string Joiner { get; set; }

[Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
public string ParaformerEncoder { get; set; }

[Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
public string ParaformerDecoder { get; set; }

[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
public int NumThreads { get; set; } = 1;

[Option("decoding-method", Required = false, Default = "greedy_search",
        HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
public string DecodingMethod { get; set; } = "greedy_search";

[Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
public bool Debug { get; set; }

[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
public int SampleRate { get; set; } = 16000;
[Option("max-active-paths", Required = false, Default = 4,
    HelpText = @"Used only when --decoding--method is modified_beam_search.

It specifies number of active paths to keep during the search")] public int MaxActivePaths { get; set; } = 4;

[Option("enable-endpoint", Required = false, Default = true,
    HelpText = "True to enable endpoint detection.")]
public bool EnableEndpoint { get; set; }

[Option("rule1-min-trailing-silence", Required = false, Default = 2.4F,
    HelpText = @"An endpoint is detected if trailing silence in seconds is

larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")] public float Rule1MinTrailingSilence { get; set; }

[Option("rule2-min-trailing-silence", Required = false, Default = 0.8F,
    HelpText = @"An endpoint is detected if trailing silence in seconds is

larger than this value after something that is not blank has been decoded. Used only when --enable-endpoint is true.")] public float Rule2MinTrailingSilence { get; set; }

[Option("rule3-min-utterance-length", Required = false, Default = 20.0F,
    HelpText = @"An endpoint is detected if the utterance in seconds is

larger than this value. Used only when --enable-endpoint is true.")] public float Rule3MinUtteranceLength { get; set; } }

static void Main(string[] args) {

Options options = new Options();
options.Tokens = Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt");
options.Encoder = Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx");
options.Decoder = Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx");
options.Joiner = Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx");

OnlineRecognizerConfig config = new OnlineRecognizerConfig();
config.FeatConfig.SampleRate = options.SampleRate;

// All models from icefall using feature dim 80.
// You can change it if your model has a different feature dim.
config.FeatConfig.FeatureDim = 80;

config.ModelConfig.Transducer.Encoder = options.Encoder;
config.ModelConfig.Transducer.Decoder = options.Decoder;
config.ModelConfig.Transducer.Joiner = options.Joiner;

config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;

config.ModelConfig.Tokens = options.Tokens;
config.ModelConfig.Provider = options.Provider;
config.ModelConfig.NumThreads = options.NumThreads;
config.ModelConfig.Debug = 1;

config.DecodingMethod = options.DecodingMethod;
config.MaxActivePaths = options.MaxActivePaths;
config.EnableEndpoint = 1;

config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence;
config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;

OnlineRecognizer recognizer = new OnlineRecognizer(config);

OnlineStream s = recognizer.CreateStream();

Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize();

Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
for (int i = 0; i != PortAudio.DeviceCount; ++i)
{
  Console.WriteLine($" Device {i}");
  DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
  Console.WriteLine($"   Name: {deviceInfo.name}");
  Console.WriteLine($"   Max input channels: {deviceInfo.maxInputChannels}");
  Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
}
int deviceIndex = PortAudio.DefaultInputDevice;
if (deviceIndex == PortAudio.NoDevice)
{
  Console.WriteLine("No default input device found");
  Environment.Exit(1);
}

DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);

Console.WriteLine();
Console.WriteLine($"Use default device {deviceIndex} ({info.name})");

StreamParameters param = new StreamParameters();
param.device = deviceIndex;
param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32;
param.suggestedLatency = info.defaultLowInputLatency;
param.hostApiSpecificStreamInfo = IntPtr.Zero;

PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
    UInt32 frameCount,
    ref StreamCallbackTimeInfo timeInfo,
    StreamCallbackFlags statusFlags,
    IntPtr userData
    ) =>
{
  float[] samples = new float[frameCount];
  Marshal.Copy(input, samples, 0, (Int32)frameCount);

  s.AcceptWaveform(options.SampleRate, samples);

  return StreamCallbackResult.Continue;
};

PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: options.SampleRate,
    framesPerBuffer: 0,
    streamFlags: StreamFlags.ClipOff,
    callback: callback,
    userData: IntPtr.Zero
    );

Console.WriteLine(param);
Console.WriteLine("Started! Please speak");

stream.Start();

String lastText = "";
int segmentIndex = 0;

while (true)
{
  while (recognizer.IsReady(s))
  {
    recognizer.Decode(s);
  }

  var text = recognizer.GetResult(s).Text;
  bool isEndpoint = recognizer.IsEndpoint(s);
  if (!string.IsNullOrWhiteSpace(text) && lastText != text)
  {
    lastText = text;
    Console.Write($"\r{segmentIndex}: {lastText}");
  }

  if (isEndpoint)
  {
    if (!string.IsNullOrWhiteSpace(text))
    {
      ++segmentIndex;
      Console.WriteLine();
    }
    recognizer.Reset(s);
  }

  Thread.Sleep(200); // ms
}

PortAudio.Terminate();

} } `

system: win10,i9 cpu

erro: Unhandled exception. System.DllNotFoundException: Unable to load DLL 'sherpa-onnx-c-api' or one of its dependencies: 动 态链接库(DLL)初始化例程失败。 (0x8007045A) at SherpaOnnx.OnlineRecognizer.CreateOnlineRecognizer(OnlineRecognizerConfig& config) at SherpaOnnx.OnlineRecognizer..ctor(OnlineRecognizerConfig config) at SpeechRecognitionFromMicrophone.Main(String[] args) in

qiaokesi commented 2 months ago

为啥C#的案例运行起来后都是 sherpa-onnx-c-api 动态链接库(DLL)初始化例程失败。 (0x8007045A)

csukuangfj commented 2 months ago

我本地没问题, CI 也没问题,你是第二位有这个问题的同学。我不清楚是什么原因导致的.

qiaokesi commented 2 months ago

我本地没问题, CI 也没问题,你试试第二位有这个问题的同学。我不清楚是什么原因导致的.

有群或者QQ嘛,也许可以远程对比排查下。

csukuangfj commented 2 months ago

我本地没问题, CI 也没问题,你试试第二位有这个问题的同学。我不清楚是什么原因导致的.

有群或者QQ嘛,也许可以远程对比排查下。

请看 https://k2-fsa.github.io/sherpa/social-groups.html

建议加 qq 群

csukuangfj commented 2 months ago

@qiaokesi please try the latest version. It should work now.