public unsafe static CharacterDecompositionMapping Parse(string s)
        {
            if (string.IsNullOrEmpty(s))
            {
                return(default(CharacterDecompositionMapping));
            }

            CompatibilityFormattingTag tag = CompatibilityFormattingTag.Canonical;

            int index;

            if (s[0] == '<')
            {
                if (!EnumHelper <CompatibilityFormattingTag> .TryGetNamedValue(s.Substring(1, (index = s.IndexOf('>')) - 1), out tag))
                {
                    throw new FormatException();
                }
                ++index;
            }
            else
            {
                index = 0;
            }

            var buffer    = stackalloc char[36];           // From the Unicode docs, a decomposition cannot have more than 18 code points.
            int charIndex = 0;

            while (index < s.Length && charIndex < 35)
            {
                char c = s[index];

                if (c == ' ')
                {
                    ++index;
                }
                else
                {
                    int codePoint = HexCodePoint.Parse(s, ref index);

                    if (codePoint < 0x10000)
                    {
                        buffer[charIndex++] = (char)codePoint;
                    }
                    else if (codePoint < 0x10FFFF)
                    {
                        codePoint          -= 0x10000;
                        buffer[charIndex++] = (char)((codePoint >> 10) + 0xD800);
                        buffer[charIndex++] = (char)((codePoint & 0x3FF) + 0xDC00);
                    }
                    else
                    {
                        throw new FormatException("The code point was outside of the allowed range.");
                    }
                }
            }

            return(new CharacterDecompositionMapping(tag, new string(buffer, 0, charIndex)));
        }
Esempio n. 2
0
        public bool Read()
        {
            bool result;

            if (result = reader.MoveToNextLine())
            {
                codePoint     = HexCodePoint.ParsePrefixed(reader.ReadField());
                propertyName  = reader.ReadField();
                propertyValue = reader.ReadField();
            }
            else
            {
                codePoint     = 0;
                propertyName  = null;
                propertyValue = null;
            }

            return(result);
        }
        private static async Task ProcessUnihanVariants(IDataSource unihanDataSource, UnicodeInfoBuilder builder)
        {
            using (var reader = new UnihanDataFileReader(await unihanDataSource.OpenDataFileAsync(UnihanVariantsFileName).ConfigureAwait(false)))
            {
                while (reader.Read())
                {
                    // This statement is used to skip unhandled properties entirely.
                    switch (reader.PropertyName)
                    {
                    case UnihanProperty.kSimplifiedVariant:
                    case UnihanProperty.kTraditionalVariant:
                        break;

                    default:
                        // Ignore unhandled properties for now.
                        continue;
                    }

                    var entry = builder.GetUnihan(reader.CodePoint);

                    switch (reader.PropertyName)
                    {
                    case UnihanProperty.kSimplifiedVariant:
                        entry.SimplifiedVariant = char.ConvertFromUtf32(HexCodePoint.ParsePrefixed(reader.PropertyValue));
                        break;

                    case UnihanProperty.kTraditionalVariant:
                        entry.TraditionalVariant = char.ConvertFromUtf32(HexCodePoint.ParsePrefixed(reader.PropertyValue));
                        break;

                    default:
                        throw new InvalidOperationException();
                    }
                }
            }
        }