上一篇中,用到的是MicPhone类,然后遗留问题是yield return new WaitForSecondsRealtime(0.04f)导致消息发送得很慢,语音识别不及时。
上一篇链接:Unity对接科大讯飞实时语音转写WebAPI(Windows平台)_unity webgl对接讯飞实时语音听写-CSDN博客
本篇将使用NAudio.Wave.WaveIn类完成麦克风语音数据的获取,属性BufferMilliseconds可以控制数据时间间隔;发送依然使用WebSocketSharp.WebSocket。直接贴代码
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;
using WebSocketSharp;
using System.Text;
using System.Security.Cryptography;
using LitJson;
using Newtonsoft.Json;
using NAudio.Wave;
public class SpeechHelper2 : MonoBehaviour
{
private WaveIn audioClip;
WebSocket speechWebSocket;
private System.Action<string> resultCallback;
private System.Action errorCallback;
private static Queue<string> messageQueue = new Queue<string>();
private static Queue<byte[]> requestQueue = new Queue<byte[]>();
public void InitSpeechHelper(System.Action<string> textCallback, Action _errorCallback)
{
resultCallback = textCallback;
errorCallback = _errorCallback;
}
private void AudioClip_DataAvailable(object sender, WaveInEventArgs e)
{
//UnityEngine.Debug.Log("AudioClip_DataAvailable");
requestQueue.Enqueue(e.Buffer);
}
public bool StartSpeech()
{
if (speechWebSocket != null && speechWebSocket.ReadyState == WebSocketState.Open)
{
//Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束");
MessageMng.Ins.ShowFloatTip("请等待上次识别结束");
return false;
}
if (WaveIn.DeviceCount <= 0)
{
//Debug.LogWarning("找不到麦克风");
MessageMng.Ins.ShowTipMsg("找不到麦克风!");
return false;
}
messageQueue.Clear();
audioClip = new WaveIn();
WaveFormat waveFormat = new WaveFormat(16000, 16, 1);
audioClip.BufferMilliseconds = 80;
audioClip.WaveFormat = waveFormat;
audioClip.DataAvailable += AudioClip_DataAvailable;
audioClip.StartRecording();
ConnectSpeechWebSocket();
return true;
}
public void StopSpeech()
{
try
{
Debug.Log("识别结束,停止录音");
audioClip.StopRecording();
audioClip.Dispose();
SendEndMsg(null);
}
catch(Exception ex)
{
Debug.LogError(ex.Message);
}
}
void ConnectSpeechWebSocket()
{
try
{
speechWebSocket = new WebSocket(GetWebSocketUrl());
}
catch (Exception ex)
{
UnityEngine.Debug.LogError(ex.Message);
return;
}
speechWebSocket.OnOpen += (sender, e) =>
{
Debug.Log("OnOpen");
speechWebSocket.OnClose += OnWebSocketClose;
};
speechWebSocket.OnMessage += OnInitMessage;
speechWebSocket.OnError += OnError;
speechWebSocket.ConnectAsync();
}
void OnWebSocketClose(object sender, CloseEventArgs e)
{
Debug.Log("OnWebSocketClose");
}
void OnInitMessage(object sender, MessageEventArgs e)
{
UnityEngine.Debug.Log("qqqqqqqqqqqqqWebSocket数据返回:" + e.Data);
messageQueue.Enqueue(e.Data);
}
private void MainThreadOnMessage(string message)
{
try
{
XFResponse response = JsonConvert.DeserializeObject<XFResponse>(message);
if (0 != response.code)
{
errorCallback?.Invoke();
MessageMng.Ins.ShowFloatTip("连接出错,请重试!");
return;
}
if (response.action.Equals("result"))
{
var result = ParseXunfeiRecognitionResult(response.data);
if (result.IsFinal)
{
//Debug.Log("Text最终:" + result.Text);
resultCallback?.Invoke(result.Text);
}
else
{
//Debug.Log("Text中间:" + result.Text);
}
}
}
catch (Exception ex)
{
Debug.LogError(ex.Message);
}
}
void OnError(object sender, WebSocketSharp.ErrorEventArgs e)
{
UnityEngine.Debug.Log("WebSoclet:发生错误:" + e.Message);
}
public SpeechRecognitionResult ParseXunfeiRecognitionResult(string dataJson)
{
StringBuilder builder = new StringBuilder();
SpeechRecognitionResult res = new SpeechRecognitionResult();
try
{
JsonData data = JsonMapper.ToObject(dataJson);
JsonData cn = data["cn"];
JsonData st = cn["st"];
if (st["ed"].ToString().Equals("0"))
{
res.IsFinal = false;
}
else
{
res.IsFinal = true;
}
JsonData rtArry = st["rt"];
foreach (JsonData rtObject in rtArry)
{
JsonData wsArr = rtObject["ws"];
foreach (JsonData wsObject in wsArr)
{
JsonData cwArr = wsObject["cw"];
foreach (JsonData cwObject in cwArr)
{
builder.Append(cwObject["w"].ToString());
}
}
}
}
catch (Exception ex)
{
Debug.LogError(ex.Message);
}
res.Text = builder.ToString();
res.type = ASRResultType.TEXT;
return res;
}
void SendData(byte[] voiceData)
{
//Debug.Log("SendData:" + voiceData.Length + ",time:" + Time.realtimeSinceStartup);
if (speechWebSocket.ReadyState != WebSocketState.Open)
{
return;
}
try
{
if (speechWebSocket != null && speechWebSocket.IsAlive)
{
speechWebSocket.SendAsync(voiceData, success =>
{
if (success)
{
//UnityEngine.Debug.Log("WebSoclet:发送成功:" + voiceData.Length);
}
else
{
//UnityEngine.Debug.Log("WebSoclet:发送失败:");
}
});
}
}
catch
{
}
}
void SendEndMsg(System.Action callback)
{
string endMsg = "{\"end\": true}";
byte[] data = Encoding.UTF8.GetBytes(endMsg);
try
{
if (speechWebSocket != null && speechWebSocket.IsAlive)
{
speechWebSocket.SendAsync(data, success =>
{
if (success)
{
UnityEngine.Debug.Log("WebSoclet:发送END成功:" + data.Length);
}
else
{
UnityEngine.Debug.Log("WebSoclet:发送END失败:");
}
callback?.Invoke();
});
}
}
catch
{
}
}
private string GetWebSocketUrl()
{
string appid = "appid";
string ts = GetCurrentUnixTimestampMillis().ToString();
string baseString = appid + ts;
string md5 = GetMD5Hash(baseString);
UnityEngine.Debug.Log("baseString:" + baseString + ",md5:" + md5);
string sha1 = CalculateHmacSha1(md5, "appsecret");
string signa = sha1;
string url = string.Format("ws://rtasr.xfyun.cn/v1/ws?appid={0}&ts={1}&signa={2}", appid, ts, signa);
UnityEngine.Debug.Log(url);
return url;
}
private long GetCurrentUnixTimestampMillis()
{
DateTime unixStartTime = new DateTime(1970, 1, 1).ToLocalTime();
DateTime now = DateTime.Now;// DateTime.UtcNow;
TimeSpan timeSpan = now - unixStartTime;
long timestamp = (long)timeSpan.TotalSeconds;
return timestamp;
}
public string GetMD5Hash(string input)
{
MD5 md5Hasher = MD5.Create();
byte[] data = md5Hasher.ComputeHash(Encoding.Default.GetBytes(input));
StringBuilder sBuilder = new StringBuilder();
for (int i = 0; i < data.Length; i++)
{
sBuilder.Append(data[i].ToString("x2"));
}
return sBuilder.ToString();
}
public string CalculateHmacSha1(string data, string key)
{
HMACSHA1 hmac = new HMACSHA1(Encoding.UTF8.GetBytes(key));
byte[] hashBytes = hmac.ComputeHash(Encoding.UTF8.GetBytes(data));
return Convert.ToBase64String(hashBytes);
}
private void Update()
{
if (messageQueue.Count > 0)
{
MainThreadOnMessage(messageQueue.Dequeue());
}
if(requestQueue.Count > 0)
{
SendData(requestQueue.Dequeue());
}
}
private void OnApplicationQuit()
{
audioClip.StopRecording();
audioClip.Dispose();
}
}
BufferMilliseconds我这里设置的80,数据长度会大于1280,讯飞也是可以完成识别的。