コード例 #1
0
        private static async Task ProcessNameAliasesFile(IDataSource ucdSource, UnicodeInfoBuilder builder)
        {
            using (var reader = new UnicodeDataFileReader(await ucdSource.OpenDataFileAsync(NameAliasesFileName).ConfigureAwait(false), ';'))
            {
                while (reader.MoveToNextLine())
                {
                    var ucd = builder.GetUcd(int.Parse(reader.ReadField(), NumberStyles.HexNumber));

                    string name     = reader.ReadField();
                    string kindName = reader.ReadField();

                    if (!EnumHelper <UnicodeNameAliasKind> .TryGetNamedValue(kindName, out var kind))
                    {
                        throw new InvalidDataException("Unrecognized name alias: " + kindName + ".3");
                    }

                    ucd.NameAliases.Add(new UnicodeNameAlias(name, kind));
                }
            }
        }
コード例 #2
0
        private static async Task ProcessNamesListFile(IDataSource ucdSource, UnicodeInfoBuilder builder)
        {
            using (var reader = new StreamReader(await ucdSource.OpenDataFileAsync(NamesListFileName).ConfigureAwait(false), Encoding.UTF8, false))
            {
                string line;
                var    characterData = null as UnicodeCharacterDataBuilder;

                while ((line = reader.ReadLine()) != null)
                {
                    if (line.Length == 0)
                    {
                        characterData = null;
                        continue;
                    }

                    if (characterData != null && line.Length > 3 && line[0] == '\t')
                    {
                        if (line[1] == 'x')
                        {
                            // We should get at least 7 characters for a valid line: <tab> "x" <space> [0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]
                            if (line.Length < 7)
                            {
                                characterData = null;
                                continue;
                            }
                            if (line[2] != ' ')
                            {
                                throw new InvalidDataException();
                            }

                            int length;

                            if (line[3].IsHexDigit())
                            {
                                length = line.IndexOf(' ', 3);
                                if (length < 0)
                                {
                                    length = line.Length;
                                }
                                length -= 3;

                                characterData.CrossRerefences.Add(int.Parse(line.Substring(3, length), NumberStyles.HexNumber));
                            }
                            else if (line[3] == '(')
                            {
                                bool hasBrackets     = line[4] == '<';
                                int  codePointOffset = line.IndexOf(hasBrackets ? "> - " : "- ", 4);

                                if (codePointOffset < 0)
                                {
                                    throw new InvalidDataException();
                                }
                                codePointOffset += hasBrackets ? 4 : 2;

                                length = line.IndexOf(')', codePointOffset);
                                if (length < 0)
                                {
                                    throw new InvalidDataException();
                                }
                                length -= codePointOffset;

                                characterData.CrossRerefences.Add(int.Parse(line.Substring(codePointOffset, length), NumberStyles.HexNumber));
                            }
                            else
                            {
                                throw new InvalidDataException();
                            }
                        }
                        continue;
                    }

                    if (line[0].IsHexDigit())
                    {
                        int codePoint = int.Parse(line.Substring(0, line.IndexOf('\t')), NumberStyles.HexNumber);
                        // This may return null, but for now, we will ignore code points that are not defined in UnicodeData.txt.
                        characterData = builder.GetUcd(codePoint);
                        // There should be no NamesList.txt entries for code points defined in a range.
                        if (characterData != null && !characterData.CodePointRange.IsSingleCodePoint)
                        {
                            // The only exception to this rule will be when we added the "Noncharacter_Code_Point" property to a few ranges, and we will ignore those.
                            if ((characterData.ContributoryProperties & ContributoryProperties.NonCharacterCodePoint) != 0)
                            {
                                characterData = null;
                            }
                            else
                            {
                                throw new InvalidDataException("Did not expect an NamesList.txt entry for U+" + codePoint.ToString("X4") + ".");
                            }
                        }
                        continue;
                    }

                    switch (line[0])
                    {
                    case '@':
                    case ';':
                    case '\t':
                        characterData = null;
                        break;

                    default:
                        throw new InvalidDataException("Unrecognized data in NamesList.txt.");
                    }
                }
            }
        }