private bool CheckCorrectPoint(IList <string> originWords, CorrectPoint correctPoint) { var wordsList = new List <string>(); var candidatesWords = new List <string>(); if (correctPoint.WordIndex != 0) { wordsList.Add(originWords[correctPoint.WordIndex - 1]); candidatesWords.Add(originWords[correctPoint.WordIndex - 1]); } wordsList.AddRange(originWords.Skip(correctPoint.WordIndex).Take(correctPoint.Length)); candidatesWords.Add("[PlaceHolder]"); if (correctPoint.WordIndex + correctPoint.Length < originWords.Count) { wordsList.Add(originWords[correctPoint.WordIndex + correctPoint.Length]); candidatesWords.Add(originWords[correctPoint.WordIndex + correctPoint.Length]); } var originScore = CalculateScore(wordsList); var pinyinSeqCandidates = PinyinTool.ChineseWord2PinyinSeqCandidates( string.Join("", originWords.Skip(correctPoint.WordIndex).Take(correctPoint.Length))); double minCandidateScore = double.MaxValue; string candidateWord = null; foreach (var pinyinSeq in pinyinSeqCandidates) { var chineseWordCandidates = PinyinTool.PinyinSequence2ChineseWordsCandidates(pinyinSeq.Split(' ')); foreach (var candidate in chineseWordCandidates) { var placeHodlerIndex = correctPoint.WordIndex == 0 ? 0 : 1; candidatesWords[placeHodlerIndex] = candidate; var currentScore = CalculateScore(candidatesWords); if (currentScore < minCandidateScore) { candidateWord = candidate; minCandidateScore = currentScore; } } } if (candidateWord == null) { return(false); } correctPoint.Score = minCandidateScore; correctPoint.Word = candidateWord; return(originScore - correctPoint.Score > ThresholdGap); }
static void Main(string[] args) { /* * Following is the demo of ChineseNormalier */ ChineseNormalier cn = new ChineseNormalier(@"D:\zhijie\ChineseSpeller\ChineseSpeller\packages\jieba.NET.0.38.3\Resources\stopwords.txt", @"D:\cmcc_task\CMCC\Data\chat\outputfolder"); var normalizerResult = cn.Normalize("这个问提不好解答", false, true, true); Console.WriteLine(normalizerResult); JiebaSegmenter segmenter = new JiebaSegmenter(); var tokens = segmenter.Cut("这个问提不好解答", false, false); foreach (var token in tokens) { Console.WriteLine(token); } PinyinTool.InitChineseWordTable(@"D:\zhijie\ChineseSpeller\ChineseSpeller\data\ChineseWordDict\dict.txt"); PinyinTool.Init(@"D:\zhijie\ChineseSpeller\ChineseSpeller\data\py\ChinesePinyinTable.txt"); var pinyinList = PinyinTool.ChineseCharToPinyinList("里"); var chineseCharList = PinyinTool.PinyinToChineseCharList("tian"); var pinyinSeqCandidates = PinyinTool.ChineseWord2PinyinSeqCandidates("使用"); var mylist = PinyinTool.PinyinSequence2ChineseWordsCandidates(new List <string>() { "xiang", "yong" }); //var trainer = new Trainer(new JBSegmenter(), // @"D:\cmcc_task\CMCC\Data\chat\inputfolder", // @"D:\cmcc_task\CMCC\Data\chat\outputfolder"); //trainer.Execution(); SpellerModel spellerModele = new SpellerModel(@"D:\cmcc_task\CMCC\Data\chat\outputfolder", new JBSegmenter()); var testPairs = new Dictionary <string, string> { { string.Empty, string.Empty }, { "我要够买流量包", "我要购买流量包" }, { "如何订狗流亮包", "如何订购流量包" }, { "本机有承诺连续12个月使用88元或以上4G主体套餐使用流亮年包的优惠未到其", "本机有承诺连续12个月使用88元或以上4G主体套餐使用流量年包的优惠未到期" }, }; int rightCnt = 0; foreach (var p in testPairs) { var ret = spellerModele.DoCorrect(p.Key); if (ret == p.Value) { rightCnt++; } else { Console.WriteLine($"result should be {p.Value} but is {ret}"); } } Console.ReadLine(); }