/// <summary> /// 获取空闲索引 /// </summary> /// <returns></returns> private int getFreeIndex() { if (FreeIndexs.Length != 0) { return(FreeIndexs.UnsafePopOnly()); } int index = Array.Length; Array = Array.copyNew(Math.Max(index << 1, sizeof(int))); FreeIndexs.PrepLength(Array.Length - index); for (int freeIndex = Array.Length - 1; freeIndex != index; FreeIndexs.UnsafeAdd(freeIndex--)) { ; } return(index); }
/// <summary> /// 添加下一个节点 /// </summary> /// <param name="next"></param> /// <param name="indexs"></param> /// <returns></returns> public Node MoveNext(ref LeftArray <Node> next, ref LeftArray <NodeIndex> indexs) { if (next.Length == 0) { if (++Index == Array.Length) { if (indexs.Length == 0) { return(null); } this = indexs.UnsafePopOnly(); } return(Array.Array[Index]); } if (++Index != Array.Length) { indexs.Add(this); } Array = next; Index = 0; return(next.Array[0]); }
/// <summary> /// 获取文本分词结果 /// </summary> /// <param name="text"></param> protected void getResult(string text) { result.Clear(); formatText = AutoCSer.Extension.StringExtension.FastAllocateString((foramtLength = text.Length) + 1); fixed(char *textFixed = formatText) { Simplified.FormatNotEmpty(text, textFixed, foramtLength); words.Length = matchs.Length = 0; char *start = textFixed, end = textFixed + foramtLength; byte type, nextType, wordType; bool isMatchMap = false; if (charTypeData != StringTrieGraph.DefaultCharTypeData.Byte) { StaticStringTrieGraph trieGraph = searcher.trieGraph; int count, index, startIndex; char trieGraphHeadChar = trieGraph.AnyHeadChar; do { if (((type = charTypeData[*start]) & StringTrieGraph.TrieGraphHeadFlag) == 0) { *end = trieGraphHeadChar; do { if ((type & ((byte)WordType.Chinese | (byte)WordType.TrieGraph)) == ((byte)WordType.Chinese | (byte)WordType.TrieGraph)) { addWord((int)(start - textFixed), 1, WordType.Chinese); } if (((nextType = charTypeData[*++start]) & StringTrieGraph.TrieGraphHeadFlag) != 0) { if (start == end) { goto TRIEGRAPHEND; } if ((nextType & (byte)WordType.Chinese) != 0 || (type & nextType & ((byte)WordType.OtherLetter | (byte)WordType.Letter | (byte)WordType.Number | (byte)WordType.Keep)) == 0) { goto TRIEGRAPH; } } type = nextType; }while (true); } TRIEGRAPH: *end = ' '; char *segment = start, segmentEnd = (type & StringTrieGraph.TrieGraphEndFlag) == 0 ? start++ : ++start; while (((type = charTypeData[*start]) & (byte)WordType.TrieGraph) != 0) { ++start; if ((type & StringTrieGraph.TrieGraphEndFlag) != 0) { segmentEnd = start; } } if ((int)(start - segment) == 1) { if ((type & (byte)WordType.Chinese) != 0) { addWord((int)(segment - textFixed), 1, (type & (byte)WordType.TrieGraph) != 0 ? WordType.TrieGraph : WordType.Chinese); } } else { if (segment != segmentEnd) { matchs.Length = 0; trieGraph.LeftRightMatchs(segment, segmentEnd, ref matchs); if ((count = matchs.Length) == 0) { segmentEnd = segment; goto CHINESE; } if (!isMatchMap) { checkMatchMap(); isMatchMap = true; } startIndex = (int)(segment - textFixed); foreach (KeyValue <int, int> value in matchs.Array) { addWord(index = value.Key + startIndex, value.Value, WordType.TrieGraph); matchMap.Set(index, value.Value); if (--count == 0) { break; } } index = (int)(segmentEnd - textFixed); do { if (matchMap.Get(startIndex) == 0 && (charTypeData[textFixed[startIndex]] & (byte)WordType.Chinese) != 0) { addWord(startIndex, 1, WordType.Chinese); } }while (++startIndex != index); } CHINESE: while (segmentEnd != start) { if ((charTypeData[*segmentEnd] & (byte)WordType.Chinese) != 0) { addWord((int)(segmentEnd - textFixed), 1, WordType.Chinese); } ++segmentEnd; } } }while (start != end); TRIEGRAPHEND: start = textFixed; } do { type = charTypeData[*start]; if ((type &= ((byte)WordType.Chinese | (byte)WordType.OtherLetter | (byte)WordType.Letter | (byte)WordType.Number | (byte)WordType.Keep)) == 0) { *end = '0'; do { type = charTypeData[*++start]; if ((type &= ((byte)WordType.Chinese | (byte)WordType.OtherLetter | (byte)WordType.Letter | (byte)WordType.Number | (byte)WordType.Keep)) != 0) { if (start == end) { goto END; } goto OTHER; } }while (true); } OTHER: *end = ' '; if ((type & (byte)WordType.Chinese) != 0) { do { if ((type & (byte)WordType.TrieGraph) == 0) { addWord((int)(start - textFixed), 1, WordType.Chinese); } }while (((type = charTypeData[*++start]) & (byte)WordType.Chinese) != 0); } else { char *segment = start; if ((type & (byte)WordType.OtherLetter) == 0) { char *word = start; wordType = type; for (nextType = charTypeData[*++start]; (nextType &= ((byte)WordType.Letter | (byte)WordType.Number | (byte)WordType.Keep)) != 0; nextType = charTypeData[*++start]) { if (type != nextType) { if (type != (byte)WordType.Keep) { addWord((int)(word - textFixed), (int)(start - word), (WordType)type); } wordType |= nextType; type = nextType; word = start; } } if (word != segment && type != (byte)WordType.Keep) { addWord((int)(word - textFixed), (int)(start - word), (WordType)type); } addWord((int)(segment - textFixed), (int)(start - segment), (WordType)wordType); } else { while ((charTypeData[*++start] & (byte)WordType.OtherLetter) != 0) { ; } addWord((int)(segment - textFixed), (int)(start - segment), WordType.OtherLetter); } } }while (start != end); END: if (words.Length != 0) { int count = words.Length, textLength = text.Length; if ((searcher.flags & SearchFlags.ResultIndexs) == 0) { foreach (KeyValue <SubString, WordType> word in words.Array) { result[word.Key] = new ResultIndexLeftArray { WordType = word.Value, TextLength = textLength }; if (--count == 0) { break; } } } else { ResultIndexLeftArray indexs; foreach (KeyValue <SubString, WordType> word in words.Array) { HashString wordKey = word.Key; if (result.TryGetValue(wordKey, out indexs)) { indexs.Indexs.Add(word.Key.Start); result[wordKey] = indexs; } else { indexs.Set(textLength, word.Value); if (indexArrays.Length != 0) { indexs.Indexs.Set(indexArrays.UnsafePopOnly(), 0); } indexs.Indexs.Add(word.Key.Start); result.Add(wordKey, indexs); } if (--count == 0) { break; } } foreach (ResultIndexLeftArray indexArray in result.Values) { indexArray.Indexs.sort(); } } } } }