/// <summary> /// 文本转HTML /// </summary> /// <param name="value">文本值</param> /// <returns>HTML编码</returns> public unsafe void ToHtml(ref subString value) { if (value.Length != 0) { int length = value.Length; fixed(char *valueFixed = value.value) { char *start = valueFixed + value.StartIndex, end = start + length; int count = encodeCount(start, end); if (count != 0) { string newValue = fastCSharp.String.FastAllocateString(length += count << 2); fixed(char *data = newValue) toHtml(start, end, data); value.UnsafeSet(newValue, 0, newValue.Length); } } } }
internal void SetString(string value) { String.UnsafeSet(value, 0, value.Length); Type = type.String; }
/// <summary> /// 文本分词 /// </summary> /// <param name="text">文本</param> /// <param name="length">文本长度</param> /// <returns>分词结果</returns> private unsafe list <subString> getWords(string text, int length) { fixed(char *textFixed = text) { simplified.Format(textFixed, length); int count = (length + 7) >> 3; byte * match = stackalloc byte[count]; fixedMap matchMap = new fixedMap(match, count, 0); list <subString> words = typePool <list <subString> > .Pop(); if (words == null) { words = new list <subString>(); } else if (words.Count != 0) { words.Clear(); } list <keyValue <int, int> > matchs = typePool <list <keyValue <int, int> > > .Pop() ?? new list <keyValue <int, int> >(); byte * charTypes = charTypePointer.Byte; subString matchWord = default(subString); for (char *start = textFixed, end = textFixed + length; start != end;) { if (*start == ' ') { *end = '?'; while (*++start == ' ') { ; } } else { * end = ' '; char *segment = start; if ((uint)(*start - 0x4E00) <= 0X9FA5 - 0x4E00) { while ((uint)(*++start - 0x4E00) <= 0X9FA5 - 0x4E00) { ; } if ((length = (int)(start - segment)) == 1) { words.Add(subString.Unsafe(text, (int)(segment - textFixed), 1)); } else { int startIndex = (int)(segment - textFixed); matchs.Empty(); matchWord.UnsafeSet(text, startIndex, length); wordTrieGraph.LeftRightMatchs(ref matchWord, matchs); if ((count = matchs.Count) != 0) { foreach (keyValue <int, int> value in matchs.UnsafeArray) { words.Add(subString.Unsafe(text, value.Key, value.Value)); matchMap.Set(value.Key, value.Value); if (--count == 0) { break; } } } int index = startIndex; for (int endIndex = startIndex + length; index != endIndex; ++index) { if (matchMap.Get(index)) { if ((count = index - startIndex) != 1) { words.Add(subString.Unsafe(text, startIndex, count)); } startIndex = index; } else { words.Add(subString.Unsafe(text, index, 1)); } } if ((index -= startIndex) > 1) { words.Add(subString.Unsafe(text, startIndex, index)); } } } else { byte type = charTypes[*start]; if (type == (byte)charType.OtherLetter) { while (charTypes[*++start] == (byte)charType.OtherLetter) { ; } } else { char *word = start; for (byte newType = charTypes[*++start]; newType >= (byte)charType.Letter; newType = charTypes[*++start]) { if (type != newType) { if (type != (byte)charType.Keep) { words.Add(subString.Unsafe(text, (int)(word - textFixed), (int)(start - word))); } type = newType; word = start; } } } words.Add(subString.Unsafe(text, (int)(segment - textFixed), (int)(start - segment))); } } } typePool <list <keyValue <int, int> > > .PushNotNull(matchs); if ((count = words.Count) == 0) { typePool <list <subString> > .PushNotNull(words); return(null); } return(words); } }