public void AfterSegment(SuperLinkedList <WordInfo> result) { if (Text.Length > SettingCache.MaxIndex) { return; } var list = SpellUtils.FormatFirstWord(Text); var builder = new StringBuilder(10); for (int i = 0; i < list.Count; i++) { if (SpellUtils.IsCnOrNum(list[i])) { builder.Append(list[i]); if (list[i].Length < 2) { continue; } var cns = SpellUtils.GetSpellSegment(list[i], false, true); if (cns != null) { foreach (var cn in cns) { var n = cn; if (i != 0) { n = string.Format("{0}{1}", cn, Text.Replace(list[i], "")); } AddIndex(result, n, i, SettingCache.MaxIndex - i); } } } else { builder.Append(list[i][0]); } } var results = SpellUtils.GetSpellSegment(builder.ToString(), false, true); if (results != null) { foreach (var r in results) { AddIndex(result, r, 0, SettingCache.MaxIndex); } } }
public void SpellTest() { LuceneEngine engine = new LuceneEngine(); engine.Init(); var firsts = SpellUtils.GetCnSegment("NBA常规赛-快船vs凯尔特人"); var spells = SpellUtils.GetSpellSegment("战重警和"); var segment = new Segment(); var collection = segment.DoSegment("国际足球100509K联赛釜山-大田"); var list = new List <string>(); foreach (WordInfo word in collection) { if (word == null) { continue; } list.Add(word.Word); } //var list = SpellUtils.GetSpellSegment("战警"); string sss = "san国yan义"; string ssss = Synacast.LuceneNetSearcher.Searcher.Searcher.SegmentKeyWord(sss); string text = "重庆"; TextReader tr = new StringReader(text); PanGuTokenizer ct = new PanGuTokenizer(tr); int end = 0; Lucene.Net.Analysis.Token t; string ss = string.Empty; while (end < text.Length) { t = ct.Next(); end = t.EndOffset(); ss = ss + t.TermText() + "/ "; } }
public PanGuTokenizer(TextReader input) : base(input) { _inputText = base.input.ReadToEnd(); if (string.IsNullOrEmpty(_inputText)) { char[] readBuf = new char[1024]; int relCount = base.input.Read(readBuf, 0, readBuf.Length); StringBuilder inputStr = new StringBuilder(readBuf.Length); while (relCount > 0) { inputStr.Append(readBuf, 0, relCount); relCount = input.Read(readBuf, 0, readBuf.Length); } if (inputStr.Length > 0) { _inputText = inputStr.ToString(); } } _isFlag = AnalyzInput(); if (!_isFlag) //盘古分词 { global::PanGu.Segment segment = new Segment(); var wordInfos = segment.DoSegment(_inputText); foreach (var wi in wordInfos) { var list = SpellUtils.GetSpellSegment(wi.Word); if (list != null) { _segmentList.AddRange(list); } } _wordList = new List <WordInfo>(wordInfos); } else { if (_inputText.EndsWith(_indexFlag)) //逗号空格分词 { string[] sources = _inputText.Replace(_indexFlag, "").Split(_splitFlag, StringSplitOptions.RemoveEmptyEntries); foreach (string source in sources) { _segmentList.Add(source); var spells = SpellUtils.GetSpellSegment(source); if (spells != null) { _segmentList.AddRange(spells); } } } else if (_inputText.EndsWith(_indexCnName)) //拼音首字母分词 { string source = _inputText.Replace(_indexCnName, ""); _wordList = SpellUtils.GetCnSegment(source); } else //flag分词 { string source = _inputText.Replace("|", ""); for (int i = 0; i < source.Length; i++) { string first = source[i].ToString(); _segmentList.Add(first); int f = 1; for (int j = i + 1; j < source.Length; j++) { string s = string.Format("{0}|{1}", first, source[j]); _segmentList.Add(s); first = s; if (j == source.Length - 1) { f++; j = i + f - 1; first = source[i].ToString(); } } } } } }