/// <summary> /// 文本匹配算法 /// </summary> /// <param name="fileUrls"></param> /// <param name="timeSpans"></param> /// <param name="baidu"></param> /// <param name="originalText"></param> /// <returns></returns> private List <TextContrastResult> GetTextContrast(string[] fileUrls, double[] timeSpans, string[] baidu, string originalText, string lan) { //将原文按标点符号分成数组,一个一个的去对比 //string[] originalList = System.Text.RegularExpressions.Regex.Split(originalText, @"[。?!?.!……]").Where(o=>o!=" "&&o!="").ToArray(); string[] originalList = originalText.Replace(".", ".|").Replace("。", "。|").Replace("?", "?|").Replace("?", "?|").Replace("!", "!|").Replace("!", "!|").Replace("……", "……|").Split('|').Where(o => o != " " && o != "").ToArray(); //1.先排除baidu里面没有转成功的 //2.先判断两个数组的长度,一般情况是原文的长度大于等于百度翻译的长度(所以这里会有两种情况) //3.如果原文的长,开始取百度的第一个跟原文的第一个元素比较,然后再跟原文的第一个加上第二个的字符串比较,如果第二次的百分比大于第一次的百分比,那么继续往下比较 //如果百分比降低了,再比较结尾 double curr_precent = 0; //相似度 string org_contrast = ""; //匹配到的原文 int index = 0; List <TextContrastResult> list = new List <TextContrastResult>(); for (int i = 0; i < baidu.Length; i++) { string bd = baidu[i]; double time = timeSpans[i]; string file = fileUrls[i]; //(注意一点的就是,如果转换的是中文,那么当出现英文时,直接不用匹配,反之亦然) if (bd != "3301-百度语音转文字出错" && Util.isNotNull(bd)) { TextContrastResult result = new TextContrastResult(); if (lan == "zh") { RecursionHz(bd.Replace(" ", ""), originalList, out org_contrast, out curr_precent, ref index); } else { RecursionYw(bd, originalList, out org_contrast, out curr_precent, en_index); } if (curr_precent <= 50) { org_contrast = "未找到匹配的字符串!"; } result.baiduText = bd; result.file_url = file; result.timespan = time; result.contractText = org_contrast; result.precent = curr_precent + "%"; list.Add(result); } else { TextContrastResult result = new TextContrastResult(); result.baiduText = bd; result.file_url = file; result.timespan = time; result.contractText = ""; result.precent = "0%"; list.Add(result); } } return(list); }
/// <summary> /// 解析音频文件,获取时间戳,截取音频,语音转文字 /// </summary> /// <param name="sound_path"></param> /// <param name="word_path"></param> /// <param name="language"></param> /// <param name="splitTime"></param> public void GetTimeSpanLrc(string sound_path, string word_path, string language, double splitTime = 1.5) { string sound_path_mp3 = ""; //音频格式转换后的文件 List <string> cut_sound_files = new List <string> (); //切分后的音频文件 string folder = Path.GetDirectoryName(sound_path); try { if (sound_path.ToLower().EndsWith(".mp3")) { sound_path_mp3 = sound_path; //通过NAudio将文件转换为WAV格式,返回新的文件路径 sound_path = NAudioHelper.GetWavPath(sound_path); } //音频处理,通过lrc的时间戳来截取音频与内容 //1.先从lrc里面获取时间戳与内容 if (Path.GetExtension(word_path).Contains("lrc")) { Dictionary <TimeSpan, string> lrcs = Util.ReadLrc(word_path); List <TimeSpan> times = lrcs.Keys.ToList(); for (int i = 0; i < times.Count; i++) { TextContrastResult _result = new TextContrastResult(); //2.通过时间戳,截取音频 string cut_sound_file = folder + "\\" + i + "_" + sys.getRandomStr() + Path.GetExtension(sound_path); TimeSpan _start = new TimeSpan(); TimeSpan _end = new TimeSpan(); if (i == times.Count - 1) { _end = TimeSpan.Parse(FfmpegHelper.getMediaDuration(sound_path)); } else { _end = times[i + 1]; } _start = times[i]; _result.timespan = times[i].TotalSeconds; _result.contractText = lrcs[times[i]]; _result.baiduText = lrcs[times[i]]; _result.precent = "100%"; bool is_success = FfmpegHelper.CutAudioFile(sound_path, cut_sound_file, _start, _end - _start); if (is_success) { cut_sound_files.Add(cut_sound_file); } //上传到服务器 if (File.Exists(cut_sound_file)) { string server_path = UploadFile.PostFile(cut_sound_file); _result.file_url = server_path; } results.Add(_result); } } //原文 string _originalText = Util.ReadTxt(word_path); originalText = string.Join("", _originalText.Split('|')); } catch (Exception ex) { LogHelper.Error(ex.Message); } finally { isFinish = true; //删除切分的文件 foreach (string cut_sound_file in cut_sound_files) { if (Util.isNotNull(cut_sound_file) && File.Exists(cut_sound_file)) { File.Delete(cut_sound_file); } } //删除MP3文件 if (Util.isNotNull(sound_path_mp3) && File.Exists(sound_path_mp3)) { File.Delete(sound_path_mp3); string mp3_folder = Path.GetDirectoryName(sound_path_mp3); if (Directory.Exists(mp3_folder) && Directory.GetFiles(mp3_folder).Length == 0) { Directory.Delete(mp3_folder); } } //删除word文件 if (File.Exists(word_path)) { File.Delete(word_path); //删除word文件目录 string word_folder = Path.GetDirectoryName(word_path); if (Directory.Exists(word_folder) && Directory.GetFiles(word_folder).Length == 0) { Directory.Delete(word_folder); } } //删除文件夹,删除原音频文件 if (File.Exists(sound_path)) { File.Delete(sound_path); //删除原音频目录 string sound_folder = Path.GetDirectoryName(sound_path); if (Directory.Exists(sound_folder) && Directory.GetFiles(sound_folder).Length == 0) { Directory.Delete(sound_folder); } } } }