private byte[] Synthesis(EdgeTTSSettings settings, string text) { var ws = ObtainConnection(); if (ws == null) { // Cancelled return(null); } lock (ws) { // Send request var requestId = Guid.NewGuid().ToString().Replace("-", ""); var timestamp = DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffK"); try { ws.SendText( "Path:speech.config\r\n" + $"X-RequestId:{requestId}\r\n" + $"X-Timestamp:{timestamp}\r\n" + "Content-Type:application/json\r\n" + "\r\n" + "{\"context\":{\"synthesis\":{\"audio\":{\"metadataoptions\":{\"sentenceBoundaryEnabled\":\"false\",\"wordBoundaryEnabled\":\"false\"},\"outputFormat\":\"audio-24khz-48kbitrate-mono-mp3\"}}}}\r\n", _wsCancellationSource ); ws.SendText( "Path:ssml\r\n" + $"X-RequestId:{requestId}\r\n" + $"X-Timestamp:{timestamp}\r\n" + "Content-Type:application/ssml+xml\r\n" + "\r\n" + "<speak xmlns=\"http://www.w3.org/2001/10/synthesis\" xmlns:mstts=\"http://www.w3.org/2001/mstts\" xmlns:emo=\"http://www.w3.org/2009/10/emotionml\" version=\"1.0\" xml:lang=\"en-US\">" + $"<voice name=\"{settings.Voice}\">" + $"<prosody rate=\"{settings.Speed - 100}%\" pitch=\"{(settings.Pitch - 100) / 2}%\" volume=\"{settings.Volume.Clamp(1, 100)}\">" + text + "</prosody></voice></speak>\r\n", _wsCancellationSource ); } catch (Exception) { ws.Abort(); ws.Dispose(); throw; } // Start receiving var buffer = new MemoryStream(); var session = new WebSocketHelper.Session(ws); var state = ProtocolState.NotStarted; while (true) { var message = WebSocketHelper.ReceiveNextMessage(session, _wsCancellationSource); Logger.Debug($"Received WS message\n{message}"); if (message.Type == WebSocketMessageType.Text) { if (message.MessageStr.Contains(requestId)) { switch (state) { case ProtocolState.NotStarted: if (message.MessageStr.Contains("Path:turn.start")) { state = ProtocolState.TurnStarted; } break; case ProtocolState.TurnStarted: if (message.MessageStr.Contains("Path:turn.end")) { throw new IOException("Unexpected turn.end"); } else if (message.MessageStr.Contains("Path:turn.start")) { throw new IOException("Turn already started"); } break; case ProtocolState.Streaming: if (message.MessageStr.Contains("Path:turn.end")) { // All done return(buffer.ToArray()); } else { throw new IOException($"Unexpected message during streaming: {message.MessageStr}"); } default: throw new ArgumentOutOfRangeException(); } } else { if (state != ProtocolState.NotStarted) { throw new IOException("Unexpected request id during streaming"); } else { // Ignore } } } else if (message.Type == WebSocketMessageType.Binary) { switch (state) { case ProtocolState.NotStarted: // Do nothing break; case ProtocolState.TurnStarted: case ProtocolState.Streaming: // Parsing message // The first 2 bytes are the header length if (message.MessageBinary.Length < 2) { throw new IOException("Message too short"); } var headerLen = (message.MessageBinary[0] << 8) + message.MessageBinary[1]; if (message.MessageBinary.Length < 2 + headerLen) { throw new IOException("Message too short"); } var header = Encoding.UTF8.GetString(message.MessageBinary, 2, headerLen); if (header.EndsWith("Path:audio\r\n")) { if (!header.Contains(requestId)) { throw new IOException("Unexpected request id during streaming"); } state = ProtocolState.Streaming; buffer.Write(message.MessageBinary, 2 + headerLen, message.MessageBinary.Length - 2 - headerLen); } else { Logger.Warn($"Unexpected message with header {header}"); } break; default: throw new ArgumentOutOfRangeException(); } } else if (message.Type == WebSocketMessageType.Close) { throw new IOException("Unexpected closing of connection"); } else { throw new ArgumentOutOfRangeException(); } } } }
private byte[] Synthesis(XfyunTTSSettings settings, string text) { var apiSecret = settings.ApiSecret; var apiKey = settings.ApiKey; var appId = settings.AppId; if (string.IsNullOrEmpty(apiKey) || string.IsNullOrEmpty(apiSecret) || string.IsNullOrEmpty(appId)) { Logger.Error(strings.msgErrorEmptyApiSecretKey); _settingsControl.NotifyEmptyApiKey(); return(null); } var base64Text = Convert.ToBase64String(Encoding.UTF8.GetBytes(text)); if (base64Text.Length > 8000) { Logger.Error("Convert string too long. No more than 2000 chinese characters."); return(null); } using (var ws = SystemClientWebSocket.CreateClientWebSocket()) { // Connect var date = DateTime.UtcNow.ToString("R"); var sign = $"host: tts-api.xfyun.cn\ndate: {date}\nGET /v2/tts HTTP/1.1"; string sha; using (var hash = new HMACSHA256(Encoding.UTF8.GetBytes(apiSecret))) { sha = Convert.ToBase64String(hash.ComputeHash(Encoding.UTF8.GetBytes(sign))); hash.Clear(); } var authorization = $"api_key=\"{apiKey}\"," + $" algorithm=\"hmac-sha256\"," + $" headers=\"host date request-line\"," + $" signature=\"{sha}\""; var url = $"wss://tts-api.xfyun.cn/v2/tts?host=tts-api.xfyun.cn" + $"&date={WebUtility.UrlEncode(date).Replace("+", "%20")}" + $"&authorization={Convert.ToBase64String(Encoding.UTF8.GetBytes(authorization))}"; try { ws.ConnectAsync(new Uri(url), _wsCancellationSource.Token).Wait(); } catch (AggregateException e) { var inner = e.InnerExceptions.First().GetBaseException(); if (inner is WebException webException) { var resp = (HttpWebResponse)webException.Response; string body = null; using (var stream = resp.GetResponseStream()) { if (stream != null) { var reader = new StreamReader(stream, string.IsNullOrEmpty(resp.CharacterSet) ? Encoding.UTF8 : Encoding.GetEncoding(resp.CharacterSet)); body = reader.ReadToEnd(); } } Logger.Error($"Unable to connect to server: {body}"); switch (resp.StatusCode) { case HttpStatusCode.Unauthorized: case HttpStatusCode.Forbidden: Logger.Error(strings.msgErrorXfyunAuthFail); return(null); } } throw; } // Send request var request = new TTSRequest { Common = new TTSRequest.CommonParam { AppId = settings.AppId, }, Business = new TTSRequest.BusinessParam { Voice = settings.Voice, Pitch = settings.Pitch * 10, Speed = settings.Speed * 10, Volume = settings.Volume * 10, }, Data = new TTSRequest.DataParam { Status = 2, Text = base64Text, } }; ws.SendText(JsonConvert.SerializeObject(request), _wsCancellationSource); // Start receiving var buffer = new MemoryStream(); var session = new WebSocketHelper.Session(ws); while (true) { var message = WebSocketHelper.ReceiveNextMessage(session, _wsCancellationSource); Logger.Debug($"Received WS message\n{message}"); if (message.Type == WebSocketMessageType.Text) { var resp = JsonConvert.DeserializeObject <TTSResponse>(message.MessageStr); if (resp.Code == 0) { // Success! if (resp.Data != null) { var data = Convert.FromBase64String(resp.Data.Audio); buffer.Write(data, 0, data.Length); if (resp.Data.Status == 2) { // Complete! return(buffer.ToArray()); } } } else { Logger.Error($"Unexpected response code received: {resp.Code}: {resp.Message}"); switch (resp.Code) { case 10005: case 10313: Logger.Error(strings.msgErrorXfyunWrongAppId); break; case 11200: case 11201: Logger.Error(strings.msgErrorXfyunInsufficientApiQuota); break; } return(null); } } else if (message.Type == WebSocketMessageType.Binary) { throw new IOException("Unexpected binary message received"); } else if (message.Type == WebSocketMessageType.Close) { throw new IOException("Unexpected closing of connection"); } else { throw new ArgumentOutOfRangeException(); } } } }