protected override List<Term> GetResult(Graph graph) { return new IndexAnalysisMerger { Graph = graph, IndexAnalysis = this }.Execute(); }
public void Learn(Graph graph, App.Crf.SplitWord splitWord) { this.splitWord = splitWord; // 亚洲人名识别 if (_isAsianName) { FindAsianPerson(graph); } // 外国人名识别 if (_isForeignName) { FindForeignPerson(graph); } }
protected override List<Term> GetResult(Graph graph) { var result = new LinkedList<Term>(); var length = graph.Terms.Length - 1; for (var i = 0; i < length; i++) { Term term; if ((term = graph.Terms[i]) != null) { result.Add(term); while ((term = term.GetNext()) != null) { result.Add(term); } } } return result; }
protected override List<Term> GetResult(Graph graph) { var merger = Merger.Create(() => { graph.WalkPath(); var result = new List<Term>(); var length = graph.Terms.Length - 1; for (var i = 0; i < length; i++) { if (graph.Terms[i] != null) { result.Add(graph.Terms[i]); } } SetRealName(graph, result); return result; }); return merger.Execute(); }
/// <summary> /// 一整句话分词,用户设置的歧异优先 /// </summary> /// <param name="temp"></param> private void AnalysisStr(string temp) { var gp = new Graph(temp); var startOffe = 0; if (_ambiguityForest != null) { var gw = new GetWord(_ambiguityForest, gp.Chars); while ((gw.GetFrontWords()) != null) { if (gw.Offe > startOffe) { Analysis(gp, startOffe, gw.Offe); } var @params = gw.GetParams(); startOffe = gw.Offe; for (var i = 0; i < @params.Length; i += 2) { gp.AddTerm(new Term(@params[i], startOffe, new TermNatures(new TermNature(@params[i + 1], 1)))); startOffe += @params[i].Length; } } } if (startOffe < gp.Chars.Length - 1) { Analysis(gp, startOffe, gp.Chars.Length); } var result = GetResult(gp); _terms.AddAll(result); }
protected abstract List<Term> GetResult(Graph graph);
/// <summary> /// 将为标准化的词语设置到分词中 /// </summary> /// <param name="graph"></param> /// <param name="result"></param> protected void SetRealName(Graph graph, List<Term> result) { if (!MyStaticValue.IsRealName) { return; } var str = graph.RealStr; foreach (var term in result) { term.RealName = str.Substring(term.Offe, term.Offe + term.Name.Length); } }
private void Analysis(Graph gp, int startOffe, int endOffe) { var chars = gp.Chars; for (var i = startOffe; i < endOffe; i++) { int start; int end; string str; switch (DatDictionary.Status(chars[i])) { case 0: gp.AddTerm(new Term(chars[i].ToString(), i, TermNatures.Null)); break; case 4: start = i; end = 1; while (++i < endOffe && DatDictionary.Status(chars[i]) == 4) { end++; } str = WordAlert.AlertEnglish(chars, start, end); gp.AddTerm(new Term(str, start, TermNatures.En)); i--; break; case 5: start = i; end = 1; while (++i < endOffe && DatDictionary.Status(chars[i]) == 5) { end++; } str = WordAlert.AlertNumber(chars, start, end); gp.AddTerm(new Term(str, start, TermNatures.M)); i--; break; default: start = i; end = i; var c = chars[start]; while (DatDictionary.InSystem[c] > 0) { end++; if (++i >= endOffe) break; c = chars[i]; } if (start == end) { gp.AddTerm(new Term(c.ToString(), i, TermNatures.Null)); continue; } _getWordsImpl.SetChars(chars, start, end); while ((str = _getWordsImpl.AllWords()) != null) { gp.AddTerm(new Term(str, _getWordsImpl.Offe, _getWordsImpl.GetItem())); } /** * 如果未分出词.以未知字符加入到gp中 */ if (DatDictionary.InSystem[c] > 0 || DatDictionary.Status(c) > 3) { i -= 1; } else { gp.AddTerm(new Term(c.ToString(), i, TermNatures.Null)); } break; } } }
private void userDefineRecognition(Graph graph, params IWoodInterface[] forests) { new UserDefineRecognition(graph.Terms, forests).Recognition(); graph.RemoveLittlePath(); graph.WalkPathByScore(); }
private void FindForeignPerson(Graph graph) { var newWords = new ForeignPersonRecognition(graph.Terms).GetNewWords(); AddListToTerm(newWords); }