Example #1
0
        /// <summary>
        /// 文本匹配算法
        /// </summary>
        /// <param name="fileUrls"></param>
        /// <param name="timeSpans"></param>
        /// <param name="baidu"></param>
        /// <param name="originalText"></param>
        /// <returns></returns>
        private List <TextContrastResult> GetTextContrast(string[] fileUrls, double[] timeSpans, string[] baidu, string originalText, string lan)
        {
            //将原文按标点符号分成数组,一个一个的去对比
            //string[] originalList = System.Text.RegularExpressions.Regex.Split(originalText, @"[。?!?.!……]").Where(o=>o!=" "&&o!="").ToArray();
            string[] originalList = originalText.Replace(".", ".|").Replace("。", "。|").Replace("?", "?|").Replace("?", "?|").Replace("!", "!|").Replace("!", "!|").Replace("……", "……|").Split('|').Where(o => o != " " && o != "").ToArray();
            //1.先排除baidu里面没有转成功的
            //2.先判断两个数组的长度,一般情况是原文的长度大于等于百度翻译的长度(所以这里会有两种情况)
            //3.如果原文的长,开始取百度的第一个跟原文的第一个元素比较,然后再跟原文的第一个加上第二个的字符串比较,如果第二次的百分比大于第一次的百分比,那么继续往下比较
            //如果百分比降低了,再比较结尾

            double curr_precent            = 0;  //相似度
            string org_contrast            = ""; //匹配到的原文
            int    index                   = 0;
            List <TextContrastResult> list = new List <TextContrastResult>();

            for (int i = 0; i < baidu.Length; i++)
            {
                string bd   = baidu[i];
                double time = timeSpans[i];
                string file = fileUrls[i];
                //(注意一点的就是,如果转换的是中文,那么当出现英文时,直接不用匹配,反之亦然)
                if (bd != "3301-百度语音转文字出错" && Util.isNotNull(bd))
                {
                    TextContrastResult result = new TextContrastResult();
                    if (lan == "zh")
                    {
                        RecursionHz(bd.Replace(" ", ""), originalList, out org_contrast, out curr_precent, ref index);
                    }
                    else
                    {
                        RecursionYw(bd, originalList, out org_contrast, out curr_precent, en_index);
                    }
                    if (curr_precent <= 50)
                    {
                        org_contrast = "未找到匹配的字符串!";
                    }
                    result.baiduText    = bd;
                    result.file_url     = file;
                    result.timespan     = time;
                    result.contractText = org_contrast;
                    result.precent      = curr_precent + "%";
                    list.Add(result);
                }
                else
                {
                    TextContrastResult result = new TextContrastResult();
                    result.baiduText    = bd;
                    result.file_url     = file;
                    result.timespan     = time;
                    result.contractText = "";
                    result.precent      = "0%";
                    list.Add(result);
                }
            }
            return(list);
        }
Example #2
0
        /// <summary>
        /// 解析音频文件,获取时间戳,截取音频,语音转文字
        /// </summary>
        /// <param name="sound_path"></param>
        /// <param name="word_path"></param>
        /// <param name="language"></param>
        /// <param name="splitTime"></param>
        public void GetTimeSpanLrc(string sound_path, string word_path, string language, double splitTime = 1.5)
        {
            string        sound_path_mp3  = "";                   //音频格式转换后的文件
            List <string> cut_sound_files = new List <string> (); //切分后的音频文件
            string        folder          = Path.GetDirectoryName(sound_path);

            try
            {
                if (sound_path.ToLower().EndsWith(".mp3"))
                {
                    sound_path_mp3 = sound_path;
                    //通过NAudio将文件转换为WAV格式,返回新的文件路径
                    sound_path = NAudioHelper.GetWavPath(sound_path);
                }
                //音频处理,通过lrc的时间戳来截取音频与内容
                //1.先从lrc里面获取时间戳与内容
                if (Path.GetExtension(word_path).Contains("lrc"))
                {
                    Dictionary <TimeSpan, string> lrcs = Util.ReadLrc(word_path);
                    List <TimeSpan> times = lrcs.Keys.ToList();
                    for (int i = 0; i < times.Count; i++)
                    {
                        TextContrastResult _result = new TextContrastResult();
                        //2.通过时间戳,截取音频
                        string   cut_sound_file = folder + "\\" + i + "_" + sys.getRandomStr() + Path.GetExtension(sound_path);
                        TimeSpan _start         = new TimeSpan();
                        TimeSpan _end           = new TimeSpan();
                        if (i == times.Count - 1)
                        {
                            _end = TimeSpan.Parse(FfmpegHelper.getMediaDuration(sound_path));
                        }
                        else
                        {
                            _end = times[i + 1];
                        }
                        _start               = times[i];
                        _result.timespan     = times[i].TotalSeconds;
                        _result.contractText = lrcs[times[i]];
                        _result.baiduText    = lrcs[times[i]];
                        _result.precent      = "100%";
                        bool is_success = FfmpegHelper.CutAudioFile(sound_path, cut_sound_file, _start, _end - _start);
                        if (is_success)
                        {
                            cut_sound_files.Add(cut_sound_file);
                        }
                        //上传到服务器
                        if (File.Exists(cut_sound_file))
                        {
                            string server_path = UploadFile.PostFile(cut_sound_file);
                            _result.file_url = server_path;
                        }
                        results.Add(_result);
                    }
                }
                //原文
                string _originalText = Util.ReadTxt(word_path);
                originalText = string.Join("", _originalText.Split('|'));
            }
            catch (Exception ex)
            {
                LogHelper.Error(ex.Message);
            }
            finally
            {
                isFinish = true;
                //删除切分的文件
                foreach (string cut_sound_file in cut_sound_files)
                {
                    if (Util.isNotNull(cut_sound_file) && File.Exists(cut_sound_file))
                    {
                        File.Delete(cut_sound_file);
                    }
                }
                //删除MP3文件
                if (Util.isNotNull(sound_path_mp3) && File.Exists(sound_path_mp3))
                {
                    File.Delete(sound_path_mp3);
                    string mp3_folder = Path.GetDirectoryName(sound_path_mp3);
                    if (Directory.Exists(mp3_folder) && Directory.GetFiles(mp3_folder).Length == 0)
                    {
                        Directory.Delete(mp3_folder);
                    }
                }
                //删除word文件
                if (File.Exists(word_path))
                {
                    File.Delete(word_path);
                    //删除word文件目录
                    string word_folder = Path.GetDirectoryName(word_path);
                    if (Directory.Exists(word_folder) && Directory.GetFiles(word_folder).Length == 0)
                    {
                        Directory.Delete(word_folder);
                    }
                }

                //删除文件夹,删除原音频文件
                if (File.Exists(sound_path))
                {
                    File.Delete(sound_path);
                    //删除原音频目录
                    string sound_folder = Path.GetDirectoryName(sound_path);
                    if (Directory.Exists(sound_folder) && Directory.GetFiles(sound_folder).Length == 0)
                    {
                        Directory.Delete(sound_folder);
                    }
                }
            }
        }