k2-fsa/sherpa-ncnn:在没有互联网连接的情况下使用带有 ncnn 的下一代 Kaldi 进行实时语音识别。支持iOS、Android、Raspberry Pi、VisionFive2、LicheePi4A等。 (github.com)
如果是PC端可以直接使用ssssssilver大佬的 https://github.com/ssssssilver/sherpa-ncnn-unity.git
我这边要折腾的是WebGL版本的,所以修改了一番
1、WebSocket,客户端使用了psygames/UnityWebSocket: :whale: The Best Unity WebSocket Plugin for All Platforms. (github.com)
using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text; using UnityEngine; using UnityEngine.UI; using UnityWebSocket; public class uSherpaWebGL : MonoBehaviour { IWebSocket ws; public Text text; Queuemsgs = new Queue (); // Start is called before the first frame update void Start() { ws = new WebSocket("ws://127.0.0.1:9999"); ws.OnOpen += OnOpen; ws.OnMessage += OnMessage; ws.OnError += OnError; ws.OnClose += OnClose; ws.ConnectAsync(); } // Update is called once per frame void Update() { if (msgs.Count > 0) { string msg = msgs.Dequeue(); text.text += msg; } } byte[] desArray; public void OnData(float[] input) { Debug.Log("input.Length:" + input.Length); SendData(input); } void SendData(float[] input) { var desArraySize = Buffer.ByteLength(input); IntPtr srcArrayPtr = Marshal.UnsafeAddrOfPinnedArrayElement(input, 0); desArray = new byte[desArraySize]; Marshal.Copy(srcArrayPtr, desArray, 0, desArraySize); if (ws != null && ws.ReadyState == WebSocketState.Open) { ws.SendAsync(desArray); } } void OnOpen(object sender, OpenEventArgs e) { Debug.Log("WS connected!"); } void OnMessage(object sender, MessageEventArgs e) { if (e.IsBinary) { string str = Encoding.UTF8.GetString(e.RawData); Debug.Log("WS received message: " + str); msgs.Enqueue(str); } else if (e.IsText) { } } void OnError(object sender, ErrorEventArgs e) { Debug.Log("WS error: " + e.Message); } void OnClose(object sender, CloseEventArgs e) { Debug.Log(string.Format("Closed: StatusCode: {0}, Reason: {1}", e.StatusCode, e.Reason)); } private void OnApplicationQuit() { if (ws != null && ws.ReadyState != WebSocketState.Closed) { ws.CloseAsync(); } } }
服务器端使用了Fleck
// See https://aka.ms/new-console-template for more information using Fleck; using System.Text; namespace uSherpaServer { internal class Program { // 声明配置和识别器变量 static SherpaNcnn.OnlineRecognizer recognizer; static SherpaNcnn.OnlineStream onlineStream; static string tokensPath = "tokens.txt"; static string encoderParamPath = "encoder_jit_trace-pnnx.ncnn.param"; static string encoderBinPath = "encoder_jit_trace-pnnx.ncnn.bin"; static string decoderParamPath = "decoder_jit_trace-pnnx.ncnn.param"; static string decoderBinPath = "decoder_jit_trace-pnnx.ncnn.bin"; static string joinerParamPath = "joiner_jit_trace-pnnx.ncnn.param"; static string joinerBinPath = "joiner_jit_trace-pnnx.ncnn.bin"; static int numThreads = 1; static string decodingMethod = "greedy_search"; static string modelPath; static float sampleRate = 16000; static IWebSocketConnection client; static void Main(string[] args) { //需要将此文件夹拷贝到exe所在的目录 modelPath = Environment.CurrentDirectory + "/sherpa-ncnn-streaming-zipformer-small-bilingual-zh-en-2023-02-16"; // 初始化配置 SherpaNcnn.OnlineRecognizerConfig config = new SherpaNcnn.OnlineRecognizerConfig { FeatConfig = { SampleRate = sampleRate, FeatureDim = 80 }, ModelConfig = { Tokens = Path.Combine(modelPath,tokensPath), EncoderParam = Path.Combine(modelPath,encoderParamPath), EncoderBin =Path.Combine(modelPath, encoderBinPath), DecoderParam =Path.Combine(modelPath, decoderParamPath), DecoderBin = Path.Combine(modelPath, decoderBinPath), JoinerParam = Path.Combine(modelPath,joinerParamPath), JoinerBin =Path.Combine(modelPath,joinerBinPath), UseVulkanCompute = 0, NumThreads = numThreads }, DecoderConfig = { DecodingMethod = decodingMethod, NumActivePaths = 4 }, EnableEndpoint = 1, Rule1MinTrailingSilence = 2.4F, Rule2MinTrailingSilence = 1.2F, Rule3MinUtteranceLength = 20.0F }; // 创建识别器和在线流 recognizer = new SherpaNcnn.OnlineRecognizer(config); onlineStream = recognizer.CreateStream(); StartWebServer(); Update(); Console.ReadLine(); } static void StartWebServer() { //存储连接对象的池 var connectSocketPool = new List(); //创建WebSocket服务端实例并监听本机的9999端口 var server = new WebSocketServer("ws://127.0.0.1:9999"); //开启监听 server.Start(socket => { //注册客户端连接建立事件 socket.OnOpen = () => { client = socket; Console.WriteLine("Open"); //将当前客户端连接对象放入连接池中 connectSocketPool.Add(socket); }; //注册客户端连接关闭事件 socket.OnClose = () => { client = null; Console.WriteLine("Close"); //将当前客户端连接对象从连接池中移除 connectSocketPool.Remove(socket); }; //注册客户端发送信息事件 socket.OnBinary = message => { float[] floatArray = new float[message.Length / 4]; Buffer.BlockCopy(message, 0, floatArray, 0, message.Length); // 将采集到的音频数据传递给识别器 onlineStream.AcceptWaveform(sampleRate, floatArray); }; }); } static string lastText = ""; static void Update() { while (true) { // 每帧更新识别器状态 if (recognizer.IsReady(onlineStream)) { recognizer.Decode(onlineStream); } var text = recognizer.GetResult(onlineStream).Text; bool isEndpoint = recognizer.IsEndpoint(onlineStream); if (!string.IsNullOrWhiteSpace(text) && lastText != text) { if (string.IsNullOrWhiteSpace(lastText)) { lastText = text; if (client != null) { client.Send(Encoding.UTF8.GetBytes(text)); //Console.WriteLine("text1:" + text); } } else { if (client != null) { client.Send(Encoding.UTF8.GetBytes(text.Replace(lastText, ""))); lastText = text; } } } if (isEndpoint) { if (!string.IsNullOrWhiteSpace(text)) { if (client != null) { client.Send(Encoding.UTF8.GetBytes("。")); } // Console.WriteLine("text2:" + text); } recognizer.Reset(onlineStream); //Console.WriteLine("Reset"); } Thread.Sleep(200); // ms } } } }
2、Unity录音插件使用了uMicrophoneWebGL 绑定DataEvent事件实时获取话筒数据(float数组)
最后放上工程地址
客户端 uSherpa: fork from https://github.com/ssssssilver/sherpa-ncnn-unity.git改成 Unity WebGL版
服务器端 GitHub - xue-fei/uSherpaServer: uSherpaServer 给Unity提供流式语音识别的websocket服务
还没有评论,来说两句吧...