/// <summary> /// 找到字幕中的生词,先进行分词,然后取每个单词的原型,然后看每个单词是否认识,认识则跳过,不认识则注释。 /// </summary> /// <param name="subtitles"></param> /// <returns></returns> private IDictionary <string, SubtitleWord> PickNewWords(ICollection <SubtitleLine> subtitles) { Dictionary <string, SubtitleWord> result = new Dictionary <string, SubtitleWord>(); var texts = subtitles.Select(s => s.EnglishText).ToList(); foreach (var line in texts) { var lineResult = sentenceParse.Pickup(line); foreach (KeyValuePair <string, string> keyValuePair in lineResult) { if (result.ContainsKey(keyValuePair.Key)) { continue; } string original = keyValuePair.Key; string word = keyValuePair.Value; var mean = sentenceParse.RemarkWord(line, word, original); if (mean != null) { var wd = new SubtitleWord() { Word = mean.Word, WordInSubitle = word, Means = mean.Means, SubtitleSentence = line, SelectMean = mean.DefaultMean == null ? mean.Means[0].ToString() : mean.DefaultMean.ToString() }; result.Add(original, wd); } } } return(result); }
/// <summary> /// 找到字幕中的生词,先进行分词,然后取每个单词的原型,然后看每个单词是否认识,认识则跳过,不认识则注释。 /// </summary> /// <param name="subtitles"></param> /// <returns></returns> private IDictionary <string, SubtitleWord> PickNewWords(ICollection <SubtitleLine> subtitles) { Dictionary <string, SubtitleWord> result = new Dictionary <string, SubtitleWord>(); var unknownWords = DbOperator.Instance.GetAllUserUnKnownVocabulary().ToDictionary(s => s.Word, s => s.IsStar); var texts = subtitles.Select(s => s.EnglishText).ToList(); foreach (var line in texts) { var lineResult = sentenceParse.Pickup(line); foreach (KeyValuePair <string, string> keyValuePair in lineResult) { string original = keyValuePair.Key; //if (knownWords.Contains(original)) continue; string word = keyValuePair.Value; //if(knownWords.Contains(word)) continue; if (result.ContainsKey(original)) { result[original].ShowCount++; if (!result[original].WordInSubtitle.Contains(word)) { result[original].WordInSubtitle.Add(word); } continue; } var mean = sentenceParse.RemarkWord(line, word, original); if (mean != null) { var wd = new SubtitleWord() { Word = mean.Word, ShowCount = 1, WordInSubtitle = new List <string>() { word }, Means = mean.Means, SubtitleSentence = line, SelectMean = mean.DefaultMean == null ? mean.Means[0].ToString() : mean.DefaultMean.ToString() }; if (unknownWords.ContainsKey(mean.Word)) { wd.IsStar = unknownWords[mean.Word]; } result.Add(original, wd); } } } return(result); }
private void btnPickupNewWords_Click(object sender, EventArgs e) { if (folderBrowserDialog1.ShowDialog() != DialogResult.OK) { return; } SentenceParse sentenceParse = new SentenceParse(); DirectoryInfo directoryInfo = new DirectoryInfo(folderBrowserDialog1.SelectedPath); List <string> sentences = new List <string>(); foreach (FileInfo fileInfo in directoryInfo.GetFiles()) { string filePath = fileInfo.FullName; var txt = FileOperationHelper.ReadFile(filePath); var stOperator = SubtitleHelper.GetOperatorByFileName(filePath); var srts = stOperator.Parse(txt); srts = stOperator.RemoveChinese(srts); sentences.AddRange(srts.Bodies.Values.Select(l => l.EnglishText)); } Splash.Show(); Splash.Status = "解析字幕中..."; IDictionary <string, VPreviewWord> previewWords = new Dictionary <string, VPreviewWord>(); foreach (var sentence in sentences) { var newWords = sentenceParse.Pickup(sentence); foreach (KeyValuePair <string, string> keyValuePair in newWords) { string original = keyValuePair.Key; string word = keyValuePair.Value; if (previewWords.ContainsKey(original)) { previewWords[original].Rank++; } else { var mean = sentenceParse.RemarkWord(sentence, word, original); if (mean != null) { var wd = new VPreviewWord() { Word = mean.Word, Rank = 1, Sentence = sentence, Mean = mean.DefaultMean == null ? mean.Means[0].ToString() : mean.DefaultMean.ToString() }; previewWords.Add(original, wd); } } } } DisplayPreviewWords(previewWords.Values); Splash.Close(); }
private void btnPickupNewWords_Click(object sender, EventArgs e) { if (folderBrowserDialog1.ShowDialog() != DialogResult.OK) { return; } SentenceParse sentenceParse=new SentenceParse(); DirectoryInfo directoryInfo = new DirectoryInfo(folderBrowserDialog1.SelectedPath); List<string> sentences=new List<string>(); foreach (FileInfo fileInfo in directoryInfo.GetFiles()) { string filePath = fileInfo.FullName; var txt = FileOperationHelper.ReadFile(filePath); var stOperator = SubtitleHelper.GetOperatorByFileName(filePath); var srts = stOperator.Parse(txt); srts = stOperator.RemoveChinese(srts); sentences.AddRange(srts.Bodies.Values.Select(l => l.EnglishText)); } Splash.Show(); Splash.Status = "解析字幕中..."; IDictionary<string,VPreviewWord> previewWords=new Dictionary<string, VPreviewWord>(); foreach (var sentence in sentences) { var newWords = sentenceParse.Pickup(sentence); foreach (KeyValuePair<string, string> keyValuePair in newWords) { string original = keyValuePair.Key; string word = keyValuePair.Value; if (previewWords.ContainsKey(original)) { previewWords[original].Rank++; } else { var mean = sentenceParse.RemarkWord(sentence, word, original); if (mean != null) { var wd = new VPreviewWord() { Word = mean.Word, Rank = 1, Sentence = sentence, Mean = mean.DefaultMean == null ? mean.Means[0].ToString() : mean.DefaultMean.ToString() }; previewWords.Add(original, wd); } } } } DisplayPreviewWords(previewWords.Values); Splash.Close(); }