/// <summary> /// 文本分词 /// </summary> /// <param name="text">文本</param> /// <param name="length">文本长度</param> /// <returns>分词结果</returns> private unsafe TmphList<TmphSubString> getWords(string text, int length) { fixed (char* textFixed = text) { TmphSimplified.Format(textFixed, length); int count = (length + 7) >> 3; byte* match = stackalloc byte[count]; TmphFixedMap matchMap = new TmphFixedMap(match, count, 0); TmphList<TmphSubString> words = TmphTypePool<TmphList<TmphSubString>>.Pop(); if (words == null) words = new TmphList<TmphSubString>(); else if (words.Count != 0) words.Clear(); TmphList<TmphKeyValue<int, int>> matchs = TmphTypePool<TmphList<TmphKeyValue<int, int>>>.Pop() ?? new TmphList<TmphKeyValue<int, int>>(); byte* charTypes = charTypePointer.Byte; for (char* start = textFixed, end = textFixed + length; start != end;) { if (*start == ' ') { *end = '?'; while (*++start == ' ') ; } else { *end = ' '; char* segment = start; if ((uint)(*start - 0x4E00) <= 0X9FA5 - 0x4E00) { while ((uint)(*++start - 0x4E00) <= 0X9FA5 - 0x4E00) ; if ((length = (int)(start - segment)) == 1) { words.Add(TmphSubString.Unsafe(text, (int)(segment - textFixed), 1)); } else { int startIndex = (int)(segment - textFixed); matchs.Empty(); wordTrieGraph.LeftRightMatchs(TmphSubString.Unsafe(text, startIndex, length), matchs); if ((count = matchs.Count) != 0) { foreach (TmphKeyValue<int, int> value in matchs.Unsafer.Array) { words.Add(TmphSubString.Unsafe(text, value.Key, value.Value)); matchMap.Set(value.Key, value.Value); if (--count == 0) break; } } int index = startIndex; for (int endIndex = startIndex + length; index != endIndex; ++index) { if (matchMap.Get(index)) { if ((count = index - startIndex) != 1) { words.Add(TmphSubString.Unsafe(text, startIndex, count)); } startIndex = index; } else words.Add(TmphSubString.Unsafe(text, index, 1)); } if ((index -= startIndex) > 1) words.Add(TmphSubString.Unsafe(text, startIndex, index)); } } else { byte type = charTypes[*start]; if (type == (byte)TmphCharType.OtherLetter) { while (charTypes[*++start] == (byte)TmphCharType.OtherLetter) ; } else { char* word = start; for (byte newType = charTypes[*++start]; newType >= (byte)TmphCharType.Letter; newType = charTypes[*++start]) { if (type != newType) { if (type != (byte)TmphCharType.Keep) { words.Add(TmphSubString.Unsafe(text, (int)(word - textFixed), (int)(start - word))); } type = newType; word = start; } } } words.Add(TmphSubString.Unsafe(text, (int)(segment - textFixed), (int)(start - segment))); } } } TmphTypePool<TmphList<TmphKeyValue<int, int>>>.Push(ref matchs); if ((count = words.Count) == 0) { TmphTypePool<TmphList<TmphSubString>>.Push(ref words); return null; } return words; } }
/// <summary> /// DataTable拆包 /// </summary> /// <param name="table"></param> /// <param name="TmphBuilder">数据对象拆包器</param> private unsafe void Get(DataTable table, TmphDataReader TmphBuilder) { var index = 0; var columns = new DataColumn[_columnNames.Length]; fixed (byte* columnFixed = _columnTypes) { var columnIndex = columnFixed; foreach (var columnName in _columnNames) { columns[index++] = new DataColumn(columnName, *columnIndex < Types.Length ? Types[*columnIndex] : typeof(object)); ++columnIndex; } table.Columns.AddRange(columns); fixed (byte* nullFixed = _dbNull) { var nullMap = new TmphFixedMap(nullFixed); for (index = 0; _rowCount != 0; --_rowCount) { var values = new object[_columnNames.Length]; columnIndex = columnFixed; for (var valueIndex = 0; valueIndex != _columnNames.Length; ++valueIndex) { values[valueIndex] = nullMap.Get(index++) ? DBNull.Value : TmphBuilder.Get(*columnIndex); ++columnIndex; } var row = table.NewRow(); row.ItemArray = values; table.Rows.Add(row); } } } }