public WordSegment(double SmoothingParameter) { m_dictCore = new WordDictionary(); m_dictBigram = new WordDictionary(); m_POSTagger = new Span(); m_uPerson = new UnknowWord(); m_uTransPerson = new UnknowWord(); m_uPlace = new UnknowWord(); m_Seg = new Segment(m_dictBigram, m_dictCore); m_Seg.OnSegmentEvent += new SegmentEventHandler(this.OnSegmentEventHandler); m_dSmoothingPara = SmoothingParameter;//Smoothing parameter }
public static void TestBiSegment() { List<string> sentence = new List<string>(); List<string> description = new List<string>(); sentence.Add(@"他说的的确实在理"); description.Add(@"普通分词测试"); sentence.Add(@"张华平3-4月份来北京开会"); description.Add(@"数字切分"); sentence.Add(@"1.加强管理"); description.Add(@"剔除多余的“.”"); sentence.Add(@"他出生于1980年1月1日10点"); description.Add(@"日期合并"); sentence.Add(@"他出生于甲子年"); description.Add(@"年份识别"); sentence.Add(@"馆内陈列周恩来和邓颖超生前使用过的物品"); description.Add(@"姓名识别"); WordDictionary coreDict = new WordDictionary(); if (!coreDict.Load(coreDictFile)) { Console.WriteLine("coreDict 字典装入错误!"); return; } WordDictionary biDict = new WordDictionary(); if (!biDict.Load(biDictFile)) { Console.WriteLine("字典装入错误!"); return; } string sSentence; string sDescription; for (int i = 0; i < sentence.Count; i++) { sSentence = sentence[i]; sDescription = description[i]; Console.WriteLine("\r\n============ {0} ============", sDescription); sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END; List<AtomNode> nodes = Segment.AtomSegment(sSentence); Console.WriteLine("原子切分:"); for (int j = 0; j < nodes.Count; j++) Console.Write("{0}, ", nodes[j].sWord); Console.WriteLine("\r\n\r\n实际切分:"); Segment segment = new Segment(biDict, coreDict); segment.BiSegment(sSentence, 0.1, 1); for (int k = 0; k < segment.m_pWordSeg.Count; k++) { for (int j = 0; j < segment.m_pWordSeg[k].Length; j++) Console.Write("{0}, ", segment.m_pWordSeg[k][j].sWord); Console.WriteLine(); } } }
public static void TestNShortPath() { int n = 2; List<int[]> result; int[] aPath; //--------------------------------------------------------------edie by SharpKey string dictPath = Path.Combine(Environment.CurrentDirectory, "Data") + Path.DirectorySeparatorChar; Console.WriteLine("正在初始化字典库,请稍候..."); //WordSegmentSample sample = new WordSegmentSample(DictPath, 2); WordSegment wordSegment=new WordSegment(); wordSegment.InitWordSegment(dictPath); Segment m_Seg=new Segment(wordSegment.m_dictBigram,wordSegment.m_dictCore);//Seg class //wordSegment.Segment("", 2); ColumnFirstDynamicArray<ChainContent> apCost = m_Seg.TestSegment("始##始这个人的确实在末##末", 0.1, 2); Console.WriteLine(apCost.ToString()); //---------------------------------- NShortPath.Calculate(apCost, n); NShortPath.printResultByIndex(); //---------------------------------------------------- // 所有路径 //---------------------------------------------------- Console.WriteLine("\r\n\r\n所有路径:"); for (int i = 0; i < n; i++) { result = NShortPath.GetPaths(i); for (int j = 0; j < result.Count; j++) { aPath = result[j]; for (int k = 0; k < aPath.Length; k++) Console.Write("{0}, ", aPath[k]); Console.WriteLine(); } Console.WriteLine("========================"); } //---------------------------------------------------- // 最佳路径 //---------------------------------------------------- Console.WriteLine("\r\n最佳路径:"); aPath = NShortPath.GetBestPath(); for (int k = 0; k < aPath.Length; k++) Console.Write("{0}, ", aPath[k]); Console.WriteLine(); //---------------------------------------------------- // 最多 n 个路径 //---------------------------------------------------- Console.WriteLine("\r\n最多 {0} 条路径:", 5); result = NShortPath.GetNPaths(5); for (int j = 0; j < result.Count; j++) { aPath = result[j]; for (int k = 0; k < aPath.Length; k++) Console.Write("{0}, ", aPath[k]); Console.WriteLine(); } }