예제 #1
0
        private static void ReadUnicodeCharacterDataEntry(BinaryReader reader, byte[] nameBuffer, out UnicodeCharacterData value)
        {
            var fields = (UcdFields)reader.ReadUInt16();

            var codePointRange = (fields & UcdFields.CodePointRange) != 0 ? new UnicodeCodePointRange(ReadCodePoint(reader), ReadCodePoint(reader)) : new UnicodeCodePointRange(ReadCodePoint(reader));

            string name = null;

            UnicodeNameAlias[] nameAliases = UnicodeNameAlias.EmptyArray;

            // Read all the official names of the character.
            if ((fields & UcdFields.Name) != 0)
            {
                int  length = reader.ReadByte();
                byte @case  = (byte)(length & 0xC0);

                if (@case < 0x80)                   // Handles the case where only the name is present.
                {
                    length = (length & 0x7F) + 1;
                    if (reader.Read(nameBuffer, 0, length) != length)
                    {
                        throw new EndOfStreamException();
                    }

                    name = Encoding.UTF8.GetString(nameBuffer, 0, length);
                }
                else
                {
                    nameAliases = new UnicodeNameAlias[(length & 0x3F) + 1];

                    if ((@case & 0x40) != 0)
                    {
                        length = reader.ReadByte() + 1;
                        if (length > 128)
                        {
                            throw new InvalidDataException("Did not expect names longer than 128 bytes.");
                        }
                        if (reader.Read(nameBuffer, 0, length) != length)
                        {
                            throw new EndOfStreamException();
                        }
                        name = Encoding.UTF8.GetString(nameBuffer, 0, length);
                    }

                    for (int i = 0; i < nameAliases.Length; ++i)
                    {
                        nameAliases[i] = new UnicodeNameAlias(reader.ReadString(), (UnicodeNameAliasKind)(reader.ReadByte()));
                    }
                }
            }

            var category = (fields & UcdFields.Category) != 0 ? (UnicodeCategory)reader.ReadByte() : UnicodeCategory.OtherNotAssigned;
            var canonicalCombiningClass = (fields & UcdFields.CanonicalCombiningClass) != 0 ? (CanonicalCombiningClass)reader.ReadByte() : CanonicalCombiningClass.NotReordered;
            var bidirectionalClass      = (fields & UcdFields.BidirectionalClass) != 0 ? (BidirectionalClass)reader.ReadByte() : 0;
            CompatibilityFormattingTag decompositionType = (fields & UcdFields.DecompositionMapping) != 0 ? (CompatibilityFormattingTag)reader.ReadByte() : CompatibilityFormattingTag.Canonical;
            string decompositionMapping        = (fields & UcdFields.DecompositionMapping) != 0 ? reader.ReadString() : null;
            var    numericType                 = (UnicodeNumericType)((int)(fields & UcdFields.NumericNumeric) >> 6);
            UnicodeRationalNumber numericValue = numericType != UnicodeNumericType.None ?
                                                 new UnicodeRationalNumber(reader.ReadInt64(), reader.ReadByte()) :
                                                 default(UnicodeRationalNumber);
            string oldName = (fields & UcdFields.OldName) != 0 ? reader.ReadString() : null;
            string simpleUpperCaseMapping = (fields & UcdFields.SimpleUpperCaseMapping) != 0 ? reader.ReadString() : null;
            string simpleLowerCaseMapping = (fields & UcdFields.SimpleLowerCaseMapping) != 0 ? reader.ReadString() : null;
            string simpleTitleCaseMapping = (fields & UcdFields.SimpleTitleCaseMapping) != 0 ? reader.ReadString() : null;
            ContributoryProperties contributoryProperties = (fields & UcdFields.ContributoryProperties) != 0 ? (ContributoryProperties)reader.ReadInt32() : 0;
            int corePropertiesAndEmojiProperties          = (fields & UcdFields.CorePropertiesAndEmojiProperties) != 0 ? ReadInt24(reader) : 0;

            int[] crossReferences = (fields & UcdFields.CrossRerefences) != 0 ? new int[reader.ReadByte() + 1] : null;

            if (crossReferences != null)
            {
                for (int i = 0; i < crossReferences.Length; ++i)
                {
                    crossReferences[i] = ReadCodePoint(reader);
                }
            }

            value = new UnicodeCharacterData
                    (
                codePointRange,
                name,
                nameAliases,
                category,
                canonicalCombiningClass,
                bidirectionalClass,
                decompositionType,
                decompositionMapping,
                numericType,
                numericValue,
                (fields & UcdFields.BidirectionalMirrored) != 0,
                oldName,
                simpleUpperCaseMapping,
                simpleLowerCaseMapping,
                simpleTitleCaseMapping,
                contributoryProperties,
                corePropertiesAndEmojiProperties,
                crossReferences
                    );
        }
예제 #2
0
        internal static void ReadFromStream(Stream stream, out Version unicodeVersion, out UnicodeCharacterData[] unicodeCharacterData, out UnihanCharacterData[] unihanCharacterData, out CjkRadicalData[] radicals, out UnicodeBlock[] blocks, out int maxContiguousIndex)
        {
            using (var reader = new BinaryReader(stream, Encoding.UTF8))
            {
                int i;

                if (reader.ReadByte() != 'U'
                    | reader.ReadByte() != 'C'
                    | reader.ReadByte() != 'D')
                {
                    throw new InvalidDataException();
                }

                byte formatVersion = reader.ReadByte();

                if (formatVersion != 2)
                {
                    throw new InvalidDataException();
                }

                var fileUnicodeVersion = new Version(reader.ReadUInt16(), reader.ReadByte(), reader.ReadByte());

                var    unicodeCharacterDataEntries = new UnicodeCharacterData[ReadCodePoint(reader)];              // Allocate one extra entry to act as a dummy entry.
                byte[] nameBuffer = new byte[128];
                int    mci        = 0;

                for (i = 0; i < unicodeCharacterDataEntries.Length; ++i)
                {
                    ReadUnicodeCharacterDataEntry(reader, nameBuffer, out unicodeCharacterDataEntries[i]);
                    if (unicodeCharacterDataEntries[i].CodePointRange.Contains(i))
                    {
                        mci = i;
                    }
                    else
                    {
                        ++i;
                        break;
                    }
                }

                maxContiguousIndex = mci;

                for (; i < unicodeCharacterDataEntries.Length; ++i)
                {
                    ReadUnicodeCharacterDataEntry(reader, nameBuffer, out unicodeCharacterDataEntries[i]);
                }

                var blockEntries = new UnicodeBlock[reader.ReadUInt16()];

                for (i = 0; i < blockEntries.Length; ++i)
                {
                    ReadBlockEntry(reader, out blockEntries[i]);
                }

                var cjkRadicalEntries = new CjkRadicalData[reader.ReadByte()];

                for (i = 0; i < cjkRadicalEntries.Length; ++i)
                {
                    ReadCjkRadicalInfo(reader, out cjkRadicalEntries[i]);
                }

                var unihanCharacterDataEntries = new UnihanCharacterData[ReadCodePoint(reader)];

                for (i = 0; i < unihanCharacterDataEntries.Length; ++i)
                {
                    ReadUnihanCharacterDataEntry(reader, out unihanCharacterDataEntries[i]);
                }

                unicodeVersion       = fileUnicodeVersion;
                unicodeCharacterData = unicodeCharacterDataEntries;
                unihanCharacterData  = unihanCharacterDataEntries;
                radicals             = cjkRadicalEntries;
                blocks = blockEntries;
            }
        }
예제 #3
0
        public static UnicodeData ReadFromStream(Stream stream)
        {
            using (var reader = new BinaryReader(stream, Encoding.UTF8))
            {
                int i;

                if (reader.ReadByte() != 'U'
                    | reader.ReadByte() != 'C'
                    | reader.ReadByte() != 'D')
                {
                    throw new InvalidDataException();
                }

                byte formatVersion = reader.ReadByte();

                if (formatVersion != 2)
                {
                    throw new InvalidDataException();
                }

                var fileUnicodeVersion = new Version(reader.ReadUInt16(), reader.ReadByte(), reader.ReadByte());

                var    unicodeCharacterDataEntries = new UnicodeCharacterData[ReadCodePoint(reader)];              // Allocate one extra entry to act as a dummy entry.
                byte[] nameBuffer         = new byte[128];
                int    maxContiguousIndex = 0;

                for (i = 0; i < unicodeCharacterDataEntries.Length; ++i)
                {
                    ReadUnicodeCharacterDataEntry(reader, nameBuffer, out unicodeCharacterDataEntries[i]);
                    if (unicodeCharacterDataEntries[i].CodePointRange.Contains(i))
                    {
                        maxContiguousIndex = i;
                    }
                    else
                    {
                        ++i;
                        break;
                    }
                }

                for (; i < unicodeCharacterDataEntries.Length; ++i)
                {
                    ReadUnicodeCharacterDataEntry(reader, nameBuffer, out unicodeCharacterDataEntries[i]);
                }

                var blockEntries = new UnicodeBlock[reader.ReadUInt16()];

                for (i = 0; i < blockEntries.Length; ++i)
                {
                    ReadBlockEntry(reader, out blockEntries[i]);
                }

                var cjkRadicalEntries = new CjkRadicalData[reader.ReadByte()];

                for (i = 0; i < cjkRadicalEntries.Length; ++i)
                {
                    ReadCjkRadicalInfo(reader, out cjkRadicalEntries[i]);
                }

                var unihanCharacterDataEntries = new UnihanCharacterData[ReadCodePoint(reader)];

                for (i = 0; i < unihanCharacterDataEntries.Length; ++i)
                {
                    ReadUnihanCharacterDataEntry(reader, out unihanCharacterDataEntries[i]);
                }

                return(new UnicodeData
                       (
                           fileUnicodeVersion,
                           unicodeCharacterDataEntries,
                           unihanCharacterDataEntries,
                           blockEntries,
                           cjkRadicalEntries,
                           maxContiguousIndex
                       ));
            }
        }