public void MiniList() { var list = new MiniList <object>(); foreach (var item in Objects) { list.Add(item); } }
private static void DecomposeInRange(string s, int startIndex, int endIndex, ref MiniList <char> dest) { var i = startIndex; while (i < endIndex) { bool isSurrogatePair; var x = ToUtf16Int(s, i, out isSurrogatePair); DecompCore(x, ref dest); i += isSurrogatePair ? 2 : 1; } }
/// <returns><c>true</c>なら<param name="result" />に結果が入る。<c>false</c>なら既に正規化されている。</returns> public static bool Compose(string s, out MiniList <char> result) { // クイックチェック bool isFirstCharToNormalizeSurrogatePair; var i = IndexOfLastNormalizedChar(s, 0, out isFirstCharToNormalizeSurrogatePair); if (i == -1) { result = new MiniList <char>(); return(false); } // ここからが本番 result = StringToMiniList(s, i); while (true) { var nextQcYes = FindNextNfcQcYes(s, i + (isFirstCharToNormalizeSurrogatePair ? 2 : 1)); var countBeforeDecompose = result.Count; DecomposeInRange(s, i, nextQcYes, ref result); ComposeInRange(ref result, countBeforeDecompose); if (nextQcYes == s.Length) { break; } i = IndexOfLastNormalizedChar(s, nextQcYes + 1, out isFirstCharToNormalizeSurrogatePair); var len = (i == -1 ? s.Length : i) - nextQcYes; if (len > 0) { result.EnsureCapacity(len); s.CopyTo(nextQcYes, result.InnerArray, result.Count, len); result.Count += len; } if (i == -1) { break; } } return(true); }
static string ToString(MiniList <int> miniList) { var sb = new StringBuilder(miniList.Count); for (var i = 0; i < miniList.Count; i++) { var x = miniList[i]; if (x <= char.MaxValue) { sb.Append((char)x); } else { x -= 0x10000; sb.Append((char)((x / 0x400) + 0xD800)).Append((char)((x % 0x400) + 0xDC00)); } } return(sb.ToString()); }
public static void Extract(string text, bool urlWithoutProtocol, Dictionary <int, TldInfo> tldDic, int longestTldLength, int shortestTldLength, List <EntityInfo> result) { var dots = new MiniList <int>(); var hashCodes = new MiniList <int>(); var startIndex = 0; Start: if (startIndex >= text.Length - 2) { return; } var dotIndex = text.IndexOf('.', startIndex); if (dotIndex == -1 || dotIndex == text.Length - 1) { return; } if (dotIndex == startIndex) { // 開始位置にいきなり . があったら正しい URL なわけないでしょ goto GoToNextToDot; } // dotIndex の位置 // www.(←)twitter.com/ // twitter.(←)com/ // . の前後が - や _ なら終了 var x = text[dotIndex - 1]; if (x == '-' || x == '_') { goto GoToNextToDot; } x = text[dotIndex + 1]; if (x == '-' || x == '_') { goto GoToNextToDot; } // 前方向に探索 // PrecedingChar まで戻る var precedingIndex = -1; var lastUnicodeCharIndex = -1; var hasScheme = false; for (var i = dotIndex - 1; i >= startIndex; i--) { var c = text[i]; if (c == '/') { // ホストの最初が - や _ なら終了 x = text[i + 1]; if (x == '-' || x == '_') { goto GoToNextToDot; } // スキーム判定 if (i >= 6) { var j = i - 1; if (text[j--] == '/' && text[j--] == ':') { switch (ToLower(text[j--])) { case 's': if (i >= 7 && ToLower(text[j--]) == 'p') { goto case 'p'; } break; case 'p': if (ToLower(text[j--]) == 't' && ToLower(text[j--]) == 't' && ToLower(text[j--]) == 'h') { if (j < 0 || IsPrecedingChar(text[j])) { precedingIndex = j; hasScheme = true; goto BreakSchemeCheck; } } break; } } } goto GoToNextToDot; } if (!IsValidDomainChar(c)) { if (IsPrecedingChar(c)) { precedingIndex = i; break; } // PrecedingChar でないなら無効 goto GoToNextToDot; } if (lastUnicodeCharIndex == -1 && IsUnicodeDomainChar(c)) { lastUnicodeCharIndex = i; } } if (!urlWithoutProtocol) { goto GoToNextToDot; } if (lastUnicodeCharIndex != -1) { if (lastUnicodeCharIndex != dotIndex - 1 && IsPrecedingChar(text[lastUnicodeCharIndex])) { // Unicode文字を含まないようにして救済 precedingIndex = lastUnicodeCharIndex; lastUnicodeCharIndex = -1; } else { goto GoToNextToDot; } } x = text[precedingIndex + 1]; if ((precedingIndex == -1 && startIndex != 0) || x == '-' || x == '_') { goto GoToNextToDot; } BreakSchemeCheck: // ホスト部分を最後まで読み取る dots.Clear(); dots.Add(dotIndex + 1); var hasUnicodeCharAfterDot = false; var nextIndex = text.Length; for (var i = dotIndex + 1; i < text.Length; i++) { var c = text[i]; if (c == '.') { // . が text の最後なら終了 // スキームなしなのに Unicode 文字が含まれていたら終了 if (i == text.Length - 1 || (!hasScheme && hasUnicodeCharAfterDot)) { nextIndex = i; break; } // . の前後の文字が - や _ なら終了 x = text[i - 1]; if (x == '-' || x == '_') { nextIndex = i - 1; break; } x = text[i + 1]; if (x == '-' || x == '_') { nextIndex = i; break; } dots.Add(i + 1); continue; } if (!IsValidDomainChar(c)) { nextIndex = i; break; } if (!hasUnicodeCharAfterDot) { hasUnicodeCharAfterDot = IsUnicodeDomainChar(c); } } // TLD 検証 TldInfo tldInfo; int dotCount; for (var i = dots.Count - 1; i >= 0; i--) { var dotIndexPlusOne = dots[i]; var len = nextIndex - dotIndexPlusOne; if (len < shortestTldLength) { continue; } if (len > longestTldLength) { len = longestTldLength; } nextIndex = dotIndexPlusOne + len; // ループ回数軽減のため、その場でハッシュ値を求める hashCodes.Clear(); var hash1 = 5381; var hash2 = hash1; for (var j = dotIndexPlusOne; j < nextIndex;) { hash1 = ((hash1 << 5) + hash1) ^ ToLower(text[j++]); hashCodes.Add(hash1 + hash2 * 1566083941); if (j >= nextIndex) { break; } hash2 = ((hash2 << 5) + hash2) ^ ToLower(text[j++]); hashCodes.Add(hash1 + hash2 * 1566083941); } for (var j = hashCodes.Count - 1; j >= 0; j--) { nextIndex = dotIndexPlusOne + j + 1; if ((nextIndex == text.Length || !IsAlnumAt(text[nextIndex])) && tldDic.TryGetValue(hashCodes[j], out tldInfo) && nextIndex - dotIndexPlusOne == tldInfo.Length) // ハッシュ衝突の簡易チェック { dotCount = i + 1; goto TldDecided; } } } goto GoToNextToDot; TldDecided: // ccTLD のサブドメインなしはスキーム必須 if (!hasScheme && tldInfo.Type == TldType.CcTld && (dotCount == 1 && (nextIndex >= text.Length || text[nextIndex] != '/'))) { goto GoToNextIndex; } // サブドメインには _ を使えるがドメインには使えない for (var i = dots.Last - 2; i > precedingIndex; i--) { var c = text[i]; if (c == '.' || c == '/') { break; } if (c == '_') { goto GoToNextIndex; } } var urlStartIndex = precedingIndex + 1; if (nextIndex >= text.Length) { goto AddAndGoNext; } // ポート番号 if (text[nextIndex] == ':' && ++nextIndex < text.Length) { var portNumberLength = 0; for (; nextIndex < text.Length; nextIndex++) { var c = text[nextIndex]; if (c <= '9' && c >= '0') { portNumberLength++; } else { break; } } if (portNumberLength == 0) { result.Add(new EntityInfo(urlStartIndex, (--nextIndex) - urlStartIndex, EntityType.Url)); goto GoToNextIndex; } } if (nextIndex >= text.Length) { goto AddAndGoNext; } // パス if (text[nextIndex] == '/') { // https?://t.co/xxxxxxxxxx だけ特別扱い var len = nextIndex - urlStartIndex; nextIndex++; if (hasScheme && (len == 11 || len == 12) && ToLower(text[nextIndex - 2]) == 'o' && ToLower(text[nextIndex - 3]) == 'c' && text[nextIndex - 4] == '.' && ToLower(text[nextIndex - 5]) == 't' && text[nextIndex - 6] == '/' && nextIndex < text.Length && IsAlnum(text[nextIndex])) { nextIndex++; for (; nextIndex < text.Length; nextIndex++) { if (!IsAlnum(text[nextIndex])) { break; } } goto AddAndGoNext; } nextIndex += EatPath(text, nextIndex); } if (nextIndex >= text.Length) { goto AddAndGoNext; } // クエリ if (text[nextIndex] == '?') { nextIndex++; nextIndex += EatQuery(text, nextIndex); } AddAndGoNext: result.Add(new EntityInfo(urlStartIndex, nextIndex - urlStartIndex, EntityType.Url)); GoToNextIndex: startIndex = nextIndex; goto Start; GoToNextToDot: startIndex = dotIndex + 1; goto Start; }
static string ToString(MiniList <char> miniList) { return(new string(miniList.InnerArray, 0, miniList.Count)); }
public Matches(DotController[,] matrix, int width, int height) { this.matrix = matrix; matches = new MiniList <DotController>(width * height); dirtyCol = new bool[width]; }
private static void ComposeInRange(ref MiniList <char> list, int startIndex) { bool isLastSurrogatePair; uint last = ToUtf16Int(list.InnerArray, startIndex, out isLastSurrogatePair); var starterIndex = startIndex; var starter = ((ulong)last) << 32; var isStarterSurrogatePair = isLastSurrogatePair; var i = startIndex + (isLastSurrogatePair ? 2 : 1); var insertIndex = i; var lastCcc = 0; while (i < list.Count) { var hi = list[i]; var isSurrogatePair = IsHighSurrogate(hi) && i + 1 < list.Count && char.IsLowSurrogate(list[i + 1]); uint c; if (isSurrogatePair) { c = ToUtf16Int(hi, list[i + 1]); i += 2; } else { c = hi; i++; } // ハングル if (!isLastSurrogatePair && !isSurrogatePair) // このifあってる?? { var LIndex = last - LBase; if (LIndex >= 0 && LIndex < LCount) { var VIndex = c - VBase; if (VIndex >= 0 && VIndex < VCount) { last = SBase + (LIndex * VCount + VIndex) * TCount; list[insertIndex - 1] = (char)last; lastCcc = 0; continue; } } var SIndex = last - SBase; if (SIndex >= 0 && SIndex < SCount && (SIndex % TCount) == 0) { var TIndex = c - TBase; if (0 < TIndex && TIndex < TCount) { last += TIndex; list[insertIndex - 1] = (char)last; lastCcc = 0; continue; } } } // ハングルここまで var ccc = GetCanonicalCombiningClass(c); if (ccc != 0 && lastCcc == ccc) { // ブロック list[insertIndex++] = hi; if (isSurrogatePair) { list[insertIndex++] = (char)c; } last = c; isLastSurrogatePair = isSurrogatePair; continue; } var key = starter | c; uint composed; if ((ccc != 0 || (ccc == 0 && lastCcc == 0)) && LookupCompositionTable(key, out composed)) { if (composed <= char.MaxValue) { if (isStarterSurrogatePair) { // 下位サロゲートのスペースを埋める Debug.Assert(insertIndex < i); for (var j = starterIndex + 1; j < --insertIndex; j++) { list[j] = list[j + 1]; } } list[starterIndex] = (char)composed; isStarterSurrogatePair = false; } else { if (!isStarterSurrogatePair) { // 下位サロゲートを入れるスペースをつくる Debug.Assert(insertIndex < i); var starterLoIndex = starterIndex + 1; for (var j = insertIndex; j > starterLoIndex; j--) { list[j] = list[j - 1]; } insertIndex++; } list[starterIndex] = (char)(composed >> 16); list[starterIndex + 1] = (char)(composed & char.MaxValue); isStarterSurrogatePair = true; } starter = ((ulong)composed) << 32; ccc = 0; // これでいい?? } else { if (ccc == 0) { starterIndex = insertIndex; starter = ((ulong)c) << 32; isStarterSurrogatePair = isSurrogatePair; } list[insertIndex++] = hi; if (isSurrogatePair) { list[insertIndex++] = (char)c; } } last = c; isLastSurrogatePair = isSurrogatePair; lastCcc = ccc; } list.Count = insertIndex; }
private static void DecompCore(uint code, ref MiniList <char> result) { // ハングルはどうせ合成するから分解しない // Unicode 8.0 用ハードコーディング // 10000 以上離れているところとハングルをショートカット if (!(code < 0x00C0 || (code > 0x1026 && (code < 0x1B06 || (code > 0x30FE && (code < 0xF900)))))) { var i = LookupDecompositionTable(code); if (i != -1) { var first = DecompositionTableEntries[i]; DecompCore(first, ref result); var second = DecompositionTableEntries[i + 1]; if (second != 0) { DecompCore(second, ref result); } return; } } var insertIndex = result.Count; var isSurrogatePair = code > char.MaxValue; if (insertIndex > 0) { var ccc = GetCanonicalCombiningClass(code); if (ccc != 0) { var j = insertIndex - 1; while (true) { uint prev = result[j]; var isPrevSurrogatePair = IsLowSurrogate(prev) && j > 0 && IsHighSurrogate(result[j - 1]); var prevCcc = GetCanonicalCombiningClass(isPrevSurrogatePair ? ToUtf16Int(result[--j], prev) : prev); if (prevCcc <= ccc) { break; } insertIndex = j; if (j == 0) { insertIndex = 0; break; } j--; } } if (result.InnerArray.Length < result.Count + 2) { var newArray = new char[result.Count * 2]; if (insertIndex < result.Count) { Array.Copy(result.InnerArray, newArray, insertIndex); Array.Copy(result.InnerArray, insertIndex, newArray, insertIndex + (isSurrogatePair ? 2 : 1), result.Count - insertIndex); } else { Array.Copy(result.InnerArray, newArray, result.Count); } result.InnerArray = newArray; } else { if (insertIndex < result.Count) { Array.Copy(result.InnerArray, insertIndex, result.InnerArray, insertIndex + (isSurrogatePair ? 2 : 1), result.Count - insertIndex); } } } else { result.EnsureCapacity(2); } if (isSurrogatePair) { result.InnerArray[insertIndex] = (char)(code >> 16); result.InnerArray[insertIndex + 1] = (char)code; result.Count += 2; } else { result.InnerArray[insertIndex] = (char)code; result.Count++; } }
internal Enumerator(MiniList <T> miniList) { this.miniList = miniList; x = -1; }
public void Dispose() { miniList = null; }