예제 #1
0
        private bool CheckCorrectPoint(IList <string> originWords, CorrectPoint correctPoint)
        {
            var wordsList       = new List <string>();
            var candidatesWords = new List <string>();

            if (correctPoint.WordIndex != 0)
            {
                wordsList.Add(originWords[correctPoint.WordIndex - 1]);
                candidatesWords.Add(originWords[correctPoint.WordIndex - 1]);
            }

            wordsList.AddRange(originWords.Skip(correctPoint.WordIndex).Take(correctPoint.Length));
            candidatesWords.Add("[PlaceHolder]");
            if (correctPoint.WordIndex + correctPoint.Length < originWords.Count)
            {
                wordsList.Add(originWords[correctPoint.WordIndex + correctPoint.Length]);
                candidatesWords.Add(originWords[correctPoint.WordIndex + correctPoint.Length]);
            }

            var originScore         = CalculateScore(wordsList);
            var pinyinSeqCandidates =
                PinyinTool.ChineseWord2PinyinSeqCandidates(
                    string.Join("", originWords.Skip(correctPoint.WordIndex).Take(correctPoint.Length)));

            double minCandidateScore = double.MaxValue;
            string candidateWord     = null;

            foreach (var pinyinSeq in pinyinSeqCandidates)
            {
                var chineseWordCandidates = PinyinTool.PinyinSequence2ChineseWordsCandidates(pinyinSeq.Split(' '));
                foreach (var candidate in chineseWordCandidates)
                {
                    var placeHodlerIndex = correctPoint.WordIndex == 0 ? 0 : 1;
                    candidatesWords[placeHodlerIndex] = candidate;
                    var currentScore = CalculateScore(candidatesWords);
                    if (currentScore < minCandidateScore)
                    {
                        candidateWord     = candidate;
                        minCandidateScore = currentScore;
                    }
                }
            }

            if (candidateWord == null)
            {
                return(false);
            }

            correctPoint.Score = minCandidateScore;
            correctPoint.Word  = candidateWord;
            return(originScore - correctPoint.Score > ThresholdGap);
        }
예제 #2
0
        static void Main(string[] args)
        {
            /*
             * Following is the demo of ChineseNormalier
             */
            ChineseNormalier cn = new ChineseNormalier(@"D:\zhijie\ChineseSpeller\ChineseSpeller\packages\jieba.NET.0.38.3\Resources\stopwords.txt",
                                                       @"D:\cmcc_task\CMCC\Data\chat\outputfolder");
            var normalizerResult = cn.Normalize("这个问提不好解答", false, true, true);

            Console.WriteLine(normalizerResult);

            JiebaSegmenter segmenter = new JiebaSegmenter();
            var            tokens    = segmenter.Cut("这个问提不好解答", false, false);

            foreach (var token in tokens)
            {
                Console.WriteLine(token);
            }

            PinyinTool.InitChineseWordTable(@"D:\zhijie\ChineseSpeller\ChineseSpeller\data\ChineseWordDict\dict.txt");
            PinyinTool.Init(@"D:\zhijie\ChineseSpeller\ChineseSpeller\data\py\ChinesePinyinTable.txt");
            var pinyinList          = PinyinTool.ChineseCharToPinyinList("里");
            var chineseCharList     = PinyinTool.PinyinToChineseCharList("tian");
            var pinyinSeqCandidates = PinyinTool.ChineseWord2PinyinSeqCandidates("使用");
            var mylist = PinyinTool.PinyinSequence2ChineseWordsCandidates(new List <string>()
            {
                "xiang", "yong"
            });

            //var trainer = new Trainer(new JBSegmenter(),
            //    @"D:\cmcc_task\CMCC\Data\chat\inputfolder",
            //    @"D:\cmcc_task\CMCC\Data\chat\outputfolder");

            //trainer.Execution();
            SpellerModel spellerModele = new SpellerModel(@"D:\cmcc_task\CMCC\Data\chat\outputfolder", new JBSegmenter());

            var testPairs = new Dictionary <string, string>
            {
                { string.Empty, string.Empty },
                { "我要够买流量包", "我要购买流量包" },
                { "如何订狗流亮包", "如何订购流量包" },
                { "本机有承诺连续12个月使用88元或以上4G主体套餐使用流亮年包的优惠未到其", "本机有承诺连续12个月使用88元或以上4G主体套餐使用流量年包的优惠未到期" },
            };

            int rightCnt = 0;

            foreach (var p in testPairs)
            {
                var ret = spellerModele.DoCorrect(p.Key);
                if (ret == p.Value)
                {
                    rightCnt++;
                }
                else
                {
                    Console.WriteLine($"result should be {p.Value} but is {ret}");
                }
            }

            Console.ReadLine();
        }