/// <summary> /// 一整句话分词,用户设置的歧异优先 /// </summary> /// <param name="temp"></param> private void AnalysisStr(string temp) { var gp = new Graph(temp); var startOffe = 0; if (_ambiguityForest != null) { var gw = new GetWord(_ambiguityForest, gp.Chars); while ((gw.GetFrontWords()) != null) { if (gw.Offe > startOffe) { Analysis(gp, startOffe, gw.Offe); } var @params = gw.GetParams(); startOffe = gw.Offe; for (var i = 0; i < @params.Length; i += 2) { gp.AddTerm(new Term(@params[i], startOffe, new TermNatures(new TermNature(@params[i + 1], 1)))); startOffe += @params[i].Length; } } } if (startOffe < gp.Chars.Length - 1) { Analysis(gp, startOffe, gp.Chars.Length); } var result = GetResult(gp); _terms.AddAll(result); }
private void Analysis(Graph gp, int startOffe, int endOffe) { var chars = gp.Chars; for (var i = startOffe; i < endOffe; i++) { int start; int end; string str; switch (DatDictionary.Status(chars[i])) { case 0: gp.AddTerm(new Term(chars[i].ToString(), i, TermNatures.Null)); break; case 4: start = i; end = 1; while (++i < endOffe && DatDictionary.Status(chars[i]) == 4) { end++; } str = WordAlert.AlertEnglish(chars, start, end); gp.AddTerm(new Term(str, start, TermNatures.En)); i--; break; case 5: start = i; end = 1; while (++i < endOffe && DatDictionary.Status(chars[i]) == 5) { end++; } str = WordAlert.AlertNumber(chars, start, end); gp.AddTerm(new Term(str, start, TermNatures.M)); i--; break; default: start = i; end = i; var c = chars[start]; while (DatDictionary.InSystem[c] > 0) { end++; if (++i >= endOffe) break; c = chars[i]; } if (start == end) { gp.AddTerm(new Term(c.ToString(), i, TermNatures.Null)); continue; } _getWordsImpl.SetChars(chars, start, end); while ((str = _getWordsImpl.AllWords()) != null) { gp.AddTerm(new Term(str, _getWordsImpl.Offe, _getWordsImpl.GetItem())); } /** * 如果未分出词.以未知字符加入到gp中 */ if (DatDictionary.InSystem[c] > 0 || DatDictionary.Status(c) > 3) { i -= 1; } else { gp.AddTerm(new Term(c.ToString(), i, TermNatures.Null)); } break; } } }