public UnicodeBlockInfo[] Read() { if (this.hasReadBeenCalled) { throw new InvalidOperationException(); } this.hasReadBeenCalled = true; this.m_specialBlockNames = new string[] { "CJK Unified Ideographs Extension A", "CJK Unified Ideographs", "Hangul Syllables", "CJK Unified Ideographs Extension B", }; string allText = Utilities.ReadAllText(FileName, true, Encoding.UTF8); int specialBlocks_usageFlags = 0; string[] lineStrings = allText.Split(new string[] { "\r\n", }, StringSplitOptions.RemoveEmptyEntries); var blockList = new List<UnicodeBlockInfo>(); for (int lineNo = 0; lineNo < lineStrings.Length; ++lineNo) { var lineStr = lineStrings[lineNo]; var valueStrs = lineStr.Split(';'); var block = new UnicodeBlockInfo(); ParseRange(valueStrs[0], block); block.Name = ParseBlockName(valueStrs[1]).ToNullIfEmpty(); int j = this.GetSpecialBlockId(block.Name); if (0 <= j) { if (31 < j) { throw new NotSupportedException(); } if ((specialBlocks_usageFlags & (1 << j)) != 0) { throw new InvalidDataException("Data contains multiple blocks with a special block name \"" + block.Name + "\""); } specialBlocks_usageFlags |= (1 << j); block.IsNotSupported = true; } blockList.Add(block); } if (specialBlocks_usageFlags != (m_specialBlockNames.Length == 32 ? -1 : (1 << m_specialBlockNames.Length) - 1)) { throw new InvalidDataException("Data does not contain all special blocks."); } blockList.Sort((b1, b2) => b1.FirstCodePoint.CompareTo(b2.FirstCodePoint)); return blockList.ToArray(); }
private static UnicodeBlockInfo Find(UnicodeBlockInfo[] blocks, CodePoint c) { int i = BinarySearch(blocks, c); if (i < 0) { i = ~i; --i; } var block = blocks[i]; if (block.LastCodePoint < c) { return null; } return block; }
private static int BinarySearch(UnicodeBlockInfo[] blocks, CodePoint c) { int lo, hi; lo = 0; hi = blocks.Length - 1; while (lo <= hi) { int mi = lo + (hi - lo) / 2; var blockFirstCp = blocks[mi].FirstCodePoint; if (blockFirstCp < c) { lo = mi + 1; } else if (c < blockFirstCp) { hi = mi - 1; } else { return mi; } } return ~lo; }
private static void ParseRange(string s, UnicodeBlockInfo block) { var hexNums = s.Split(new string[] { "..", }, StringSplitOptions.None); if (hexNums.Length != 2) { throw new FormatException(); } int min, max; try { min = Convert.ToInt32(hexNums[0], 16); max = Convert.ToInt32(hexNums[1], 16); } catch (OverflowException) { throw new FormatException(); } if (min > max || 0x10FFFF < max) { throw new FormatException(); } block.FirstCodePoint = new CodePoint(min); block.LastCodePoint = new CodePoint(max); }