public void RegexStateMachine()
        {
            // Initialize unicode-related classes to avoid having their cost in the time computation
            var sw       = Stopwatch.StartNew();
            var provider = new UnicodeCharSetProvider();

            provider.GetClassSet(CharSetClass.Digit);
            provider.GetClassSet(CharSetClass.Dot);
            provider.GetClassSet(CharSetClass.Space);
            provider.GetClassSet(CharSetClass.Word);
            UnicodeRanges.FromUnicodeName("InCombining_Diacritical_Marks");
            sw.Stop();
            this.output.WriteLine("Unicode init ms: " + sw.ElapsedMilliseconds);
            sw = Stopwatch.StartNew();
            RegexLexer.CreateStateMachine(out var stateMachine, out var startStateId);
            var lapBuild   = sw.ElapsedTicks;
            var sm         = stateMachine.Compile();
            var lapCompile = sw.ElapsedTicks;
            var startState = new Id <DfaState <LetterId> >(startStateId);

            sm(ref startState, ' ');
            var lapJit = sw.ElapsedTicks;

            sm(ref startState, ' ');
            sw.Stop();
            this.output.WriteLine("Build ticks: " + lapBuild);
            this.output.WriteLine("Compile ticks: " + (lapCompile - lapBuild));
            this.output.WriteLine("JIT ticks: " + (lapJit - lapCompile));
            this.output.WriteLine("Exec ticks: " + (sw.ElapsedTicks - lapJit));
            this.output.WriteLine("Total ms: " + sw.ElapsedMilliseconds);
            this.output.WriteLine(stateMachine.ToReadableString());
        }
Beispiel #2
0
        public void CharacterInSet(string ch, bool inSet, string unicodeName)
        {
            Codepoint parsedChar;

            if (ch.StartsWith("\\"))
            {
                Assert.True(((RangeSetHandle.Static)RegexMatchSet.ParseEscape(ch)).TryGetSingle(out parsedChar));
            }
            else
            {
                parsedChar = ch.Single();
            }
            Assert.Equal(inSet, UnicodeRanges.FromUnicodeName(unicodeName).Contains(parsedChar));
        }
        public static RangeSetHandle ParseEscape(string escape)
        {
            var match = rxEscape.Match(escape);

            if (!match.Success)
            {
                throw new ArgumentException("Escape is invalid", "escape");
            }
            if (match.Groups["name"].Success)
            {
                return(new RangeSetHandle.Static(UnicodeRanges.FromUnicodeName(match.Groups["name"].Value), match.Groups["c"].Value == "P"));
            }
            if (match.Groups["hex"].Success)
            {
                return(new RangeSetHandle.Static(Codepoint.Parse(match.Groups["hex"].Value)));
            }
            var c = match.Groups["c"].Value[0];

            switch (c)
            {
            case '0':
                return(new RangeSetHandle.Static('\0'));

            case 'r':
                return(new RangeSetHandle.Static('\r'));

            case 'n':
                return(new RangeSetHandle.Static('\n'));

            case 't':
                return(new RangeSetHandle.Static('\t'));

            case 'a':
                return(new RangeSetHandle.Static('\x07'));

            case 'e':
                return(new RangeSetHandle.Static('\x1B'));

            case 'f':
                return(new RangeSetHandle.Static('\x0C'));

            case 'v':
                return(new RangeSetHandle.Static('\x0B'));

            case 'd':
                return(new RangeSetHandle.Class(CharSetClass.Digit, false));

            case 'D':
                return(new RangeSetHandle.Class(CharSetClass.Digit, true));

            case 'w':
                return(new RangeSetHandle.Class(CharSetClass.Word, false));

            case 'W':
                return(new RangeSetHandle.Class(CharSetClass.Word, true));

            case 's':
                return(new RangeSetHandle.Class(CharSetClass.Space, false));

            case 'S':
                return(new RangeSetHandle.Class(CharSetClass.Space, true));

            default:
                if (char.IsLetterOrDigit(c))
                {
                    throw new ArgumentOutOfRangeException(nameof(escape), "Invalid escape character " + c);
                }
                return(new RangeSetHandle.Static(c));
            }
        }
 public static RegexMatchSet FromUnicode(string name, bool negate = false)
 {
     return(new RegexMatchSet($@"\{(negate ? 'P' : 'p')}{{{name}}}", new RangeSetHandle.Static(UnicodeRanges.FromUnicodeName(name), negate)));
 }
Beispiel #5
0
        private bool CheckUnicodeRanges(Validator v, OTFont fontOwner)
        {
            bool bRet = true;
            bool bOk = true;

            if (version == 0)
            {
                if (ulUnicodeRange1 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[0] was undefined. All bits must be 0.");
                    bOk = false;
                }
                if (ulUnicodeRange2 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[1] was undefined. All bits must be 0.");
                    bOk = false;
                }
                if (ulUnicodeRange3 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[2] was undefined. All bits must be 0.");
                    bOk = false;
                }
                if (ulUnicodeRange4 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[3] was undefined. All bits must be 0.");
                    bOk = false;
                }

                if (bOk)
                {
                    v.Pass(P.OS_2_P_UnicodeRanges, m_tag);
                }
                else
                {
                    bRet = false;
                }

                return bRet;
            }

            // count the number of entries in each unicode range in the cmap subtable
            

            UnicodeRanges ur = new UnicodeRanges();
            for (uint c = 0; c < 0xffff; c++)
            {
                // check if c is mapped to a glyph
                uint iGlyph = fontOwner.FastMapUnicodeToGlyphID((char)c);
                if (iGlyph != 0)
                {
                    UnicodeRanges.Range r = ur.GetRange(c);
                    if (r != null)
                    {
                        r.count++;
                    }
                }
            }


            uint nCharsInRange;


            // bit 0
            uint BASIC_LATIN_LOW                     = 0x0020; 
            nCharsInRange = ur.GetRange(BASIC_LATIN_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000001, nCharsInRange, "Basic Latin");

            // bit 1
            uint LATIN_1_SUPPLEMENT_LOW                = 0x00A0; 
            nCharsInRange = ur.GetRange(LATIN_1_SUPPLEMENT_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000002, nCharsInRange, "Latin-1 Supplement");

            // bit 2
            uint LATIN_EXTENDED_A_LOW                = 0x0100; 
            nCharsInRange = ur.GetRange(LATIN_EXTENDED_A_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000004, nCharsInRange, "Latin Extended-A");

            // bit 3
            uint LATIN_EXTENDED_B_LOW                = 0x0180; 
            nCharsInRange = ur.GetRange(LATIN_EXTENDED_B_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000008, nCharsInRange, "Latin Extended-B");

            // bit 4
            uint IPA_EXTENSIONS_LOW                    = 0x0250; 
            nCharsInRange = ur.GetRange(IPA_EXTENSIONS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000010, nCharsInRange, "IPA Extensions");

            // bit 5
            uint SPACING_MODIFIER_LETTERS_LOW        = 0x02B0; 
            nCharsInRange = ur.GetRange(SPACING_MODIFIER_LETTERS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000020, nCharsInRange, "Spacing Modifier Letters");

            // bit 6
            uint COMBINING_DIACRITICAL_MARKS_LOW     = 0x0300; 
            nCharsInRange = ur.GetRange(COMBINING_DIACRITICAL_MARKS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000040, nCharsInRange, "Combining Diacritical Marks");

            // bit 7
            uint GREEK_LOW                             = 0x0370; 
            nCharsInRange = ur.GetRange(GREEK_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000080, nCharsInRange, "Greek");
            
            // bit 8 reserved
            if ((ulUnicodeRange1 & 0x00000100) != 0)
            {
                bOk = false;
                v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #8");
            }

            // bit 9
            uint CYRILLIC_LOW                        = 0x0400; 
            uint CYRILLIC_SUPPLEMENTARY_LOW         = 0x0500;
            nCharsInRange = ur.GetRange(CYRILLIC_LOW).count
                          + ur.GetRange(CYRILLIC_SUPPLEMENTARY_LOW).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange1, 0x00000200, nCharsInRange, "Cyrillic, Cyrillic Supplementary");
            
            // bit 10
            uint ARMENIAN_LOW                        = 0x0530; 
            nCharsInRange = ur.GetRange(ARMENIAN_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000400, nCharsInRange, "Armenian");
            
            // bit 11
            uint HEBREW_LOW                            = 0x0590; 
            nCharsInRange = ur.GetRange(HEBREW_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000800, nCharsInRange, "Hebrew");
            
            if (version > 1)
            {
                // bit 12 reserved
                if ((ulUnicodeRange1 & 0x00001000) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #12");
                }
            }

            // bit 13
            uint ARABIC_LOW                            = 0x0600; 
            nCharsInRange = ur.GetRange(ARABIC_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00002000, nCharsInRange, "Arabic");
            
            if (version > 1)
            {
                // bit 14 reserved
                if ((ulUnicodeRange1 & 0x00004000) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #14");
                }
            }

            // bit 15
            uint DEVANAGARI_LOW                        = 0x0900; 
            nCharsInRange = ur.GetRange(DEVANAGARI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00008000, nCharsInRange, "Devanagari");
            
            // bit 16
            uint BENGALI_LOW                         = 0x0980; 
            nCharsInRange = ur.GetRange(BENGALI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00010000, nCharsInRange, "Bengali");
            
            // bit 17
            uint GURMUKHI_LOW                        = 0x0A00; 
            nCharsInRange = ur.GetRange(GURMUKHI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00020000, nCharsInRange, "Gurmukhi");
            
            // bit 18
            uint GUJARATI_LOW                        = 0x0A80; 
            nCharsInRange = ur.GetRange(GUJARATI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00040000, nCharsInRange, "Gujarati");
            
            // bit 19
            uint ORIYA_LOW                            = 0x0B00; 
            nCharsInRange = ur.GetRange(ORIYA_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00080000, nCharsInRange, "Oriya");
            
            // bit 20
            uint TAMIL_LOW                            = 0x0B80; 
            nCharsInRange = ur.GetRange(TAMIL_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00100000, nCharsInRange, "Tamil");
            
            // bit 21
            uint TELUGU_LOW                            = 0x0C00; 
            nCharsInRange = ur.GetRange(TELUGU_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00200000, nCharsInRange, "Telugu");
            
            // bit 22
            uint KANNADA_LOW                         = 0x0C80; 
            nCharsInRange = ur.GetRange(KANNADA_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00400000, nCharsInRange, "Kannada");
            
            // bit 23
            uint MALAYALAM_LOW                        = 0x0D00; 
            nCharsInRange = ur.GetRange(MALAYALAM_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00800000, nCharsInRange, "Malayalam");
            
            // bit 24
            uint THAI_LOW                            = 0x0E00; 
            nCharsInRange = ur.GetRange(THAI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x01000000, nCharsInRange, "Thai");
            
            // bit 25
            uint LAO_LOW                             = 0x0E80; 
            nCharsInRange = ur.GetRange(LAO_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x02000000, nCharsInRange, "Lao");
            
            // bit 26
            uint GEORGIAN_LOW                        = 0x10A0; 
            nCharsInRange = ur.GetRange(GEORGIAN_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x04000000, nCharsInRange, "Georgian");
            
            if (version > 1)
            {
                // bit 27 reserved
                if ((ulUnicodeRange1 & 0x08000000) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #27");
                }
            }
            
            // bit 28
            uint HANGUL_JAMO_LOW                     = 0x1100; 
            nCharsInRange = ur.GetRange(HANGUL_JAMO_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x10000000, nCharsInRange, "Hangul Jamo");
            
            // bit 29
            uint LATIN_EXTENDED_ADDITIONAL_LOW        = 0x1E00; 
            nCharsInRange = ur.GetRange(LATIN_EXTENDED_ADDITIONAL_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x20000000, nCharsInRange, "Latin Extended Additional");
            
            // bit 30
            uint GREEK_EXTENDED_LOW                    = 0x1F00; 
            nCharsInRange = ur.GetRange(GREEK_EXTENDED_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x40000000, nCharsInRange, "Greek Extended");
            
            // bit 31
            uint GENERAL_PUNCTUATION_LOW             = 0x2000; 
            nCharsInRange = ur.GetRange(GENERAL_PUNCTUATION_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x80000000, nCharsInRange, "General Punctuation");


            // bit 32
            uint SUPER_SUB_SCRIPTS_LOW                = 0x2070; 
            nCharsInRange = ur.GetRange(SUPER_SUB_SCRIPTS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000001, nCharsInRange, "Superscripts and Subscripts");
            
            // bit 33
            uint CURRENCY_SYMBOLS_LOW                = 0x20A0; 
            nCharsInRange = ur.GetRange(CURRENCY_SYMBOLS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000002, nCharsInRange, "Currency Symbols");
            
            // bit 34
            uint SYMBOL_COMBINING_MARKS_LOW            = 0x20D0; 
            nCharsInRange = ur.GetRange(SYMBOL_COMBINING_MARKS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000004, nCharsInRange, "Combining Diacritical marks for symbols");
            
            // bit 35
            uint LETTERLIKE_SYMBOLS_LOW                = 0x2100; 
            nCharsInRange = ur.GetRange(LETTERLIKE_SYMBOLS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000008, nCharsInRange, "Letterlike Symbols");
            
            // bit 36
            uint NUMBER_FORMS_LOW                    = 0x2150; 
            nCharsInRange = ur.GetRange(NUMBER_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000010, nCharsInRange, "Number Forms");
            
            // bit 37
            uint ARROWS_LOW                            = 0x2190; 
            uint SUPPLEMENTAL_ARROWS_A_LOW            = 0x27F0;
            uint SUPPLEMENTAL_ARROWS_B_LOW            = 0x2900;
            nCharsInRange = ur.GetRange(ARROWS_LOW).count
                          + ur.GetRange(SUPPLEMENTAL_ARROWS_A_LOW).count
                          + ur.GetRange(SUPPLEMENTAL_ARROWS_B_LOW).count; 
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x00000020, nCharsInRange, "Arrows, Supplementary Arrows A, Supplementary Arrows B");

            // bit 38
            uint MATH_OPERATORS_LOW                    = 0x2200; 
            uint SUPPLEMENTAL_MATH_OPERATORS_LOW    = 0x2A00;
            uint MISC_MATH_SYMBOLS_A_LOW            = 0x27C0;
            uint MISC_MATH_SYMBOLS_B_LOW            = 0x2980;
            nCharsInRange = ur.GetRange(MATH_OPERATORS_LOW).count
                          + ur.GetRange(SUPPLEMENTAL_MATH_OPERATORS_LOW).count
                          + ur.GetRange(MISC_MATH_SYMBOLS_A_LOW).count
                          + ur.GetRange(MISC_MATH_SYMBOLS_B_LOW).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x00000040, nCharsInRange, "Mathematical Operators, Supplemental Mathematical Operators, Mathematical Symbols A, Mathematical Symbols B");
            
            // bit 39
            uint MISC_TECHNICAL_LOW                    = 0x2300; 
            nCharsInRange = ur.GetRange(MISC_TECHNICAL_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000080, nCharsInRange, "Miscellaneous Technical");
            
            // bit 40
            uint CONTROL_PICTURES_LOW                = 0x2400; 
            nCharsInRange = ur.GetRange(CONTROL_PICTURES_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000100, nCharsInRange, "Control Pictures");
            
            // bit 41
            uint OCR_LOW                             = 0x2440; 
            nCharsInRange = ur.GetRange(OCR_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000200, nCharsInRange, "Optical Character Recognition");
            
            // bit 42
            uint ENCLOSED_ALPHANUMERICS_LOW            = 0x2460; 
            nCharsInRange = ur.GetRange(ENCLOSED_ALPHANUMERICS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000400, nCharsInRange, "Enclosed Alphanumerics");
            
            // bit 43
            uint BOX_DRAWING_LOW                     = 0x2500; 
            nCharsInRange = ur.GetRange(BOX_DRAWING_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000800, nCharsInRange, "Box Drawing");
            
            // bit 44
            uint BLOCK_ELEMENTS_LOW                    = 0x2580; 
            nCharsInRange = ur.GetRange(BLOCK_ELEMENTS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00001000, nCharsInRange, "Block Elements");
            
            // bit 45
            uint GEOMETRIC_SHAPES_LOW                = 0x25A0; 
            nCharsInRange = ur.GetRange(GEOMETRIC_SHAPES_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00002000, nCharsInRange, "Geometric Shapes");
            
            // bit 46
            uint MISC_SYMBOLS_LOW                    = 0x2600; 
            nCharsInRange = ur.GetRange(MISC_SYMBOLS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00004000, nCharsInRange, "Miscellaneous Symbols");
            
            // bit 47
            uint DINGBATS_LOW                        = 0x2700; 
            nCharsInRange = ur.GetRange(DINGBATS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00008000, nCharsInRange, "Dingbats");
            
            // bit 48
            uint CJK_SYMBOLS_PUNCTUATION_LOW         = 0x3000; 
            nCharsInRange = ur.GetRange(CJK_SYMBOLS_PUNCTUATION_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00010000, nCharsInRange, "CJK Symbols and Punctuation");
            
            // bit 49
            uint HIRAGANA_LOW                        = 0x3040; 
            nCharsInRange = ur.GetRange(HIRAGANA_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00020000, nCharsInRange, "Hiragana");
            
            // bit 50
            uint KATAKANA_LOW                        = 0x30A0; 
            nCharsInRange = ur.GetRange(KATAKANA_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00040000, nCharsInRange, "Katakana");
            
            // bit 51
            uint BOPOMOFO_LOW                        = 0x3100; 
            uint BOPOMOFO_EXTENDED_LOW                = 0x31A0;
            nCharsInRange = ur.GetRange(BOPOMOFO_LOW).count
                          + ur.GetRange(BOPOMOFO_EXTENDED_LOW).count; 
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x00080000, nCharsInRange, "Bopomofo, Bopomofo Extended");
            
            // bit 52
            uint HANGUL_COMPAT_JAMO_LOW                = 0x3130; 
            nCharsInRange = ur.GetRange(HANGUL_COMPAT_JAMO_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00100000, nCharsInRange, "Hangul Compatibility Jamo");
            
            // bit 53 !!! OT Spec 1.3 says bit 53 is CJK Misc !!!
            //        !!! Since there's no unicode range name !!!
            //        !!! that maps nicely to CJK Misc,       !!!
            //        !!! I naturally just ignore it.         !!!
            /*
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00200000, nCharsInRange, "CJK Miscellaneous");
            */
            
            // bit 54
            uint ENCLOSED_CJK_LETTERS_MONTHS_LOW     = 0x3200; 
            nCharsInRange = ur.GetRange(ENCLOSED_CJK_LETTERS_MONTHS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00400000, nCharsInRange, "Enclosed CJK Letters and Months");
            
            // bit 55
            uint CJK_COMPATIBILITY_LOW                = 0x3300; 
            nCharsInRange = ur.GetRange(CJK_COMPATIBILITY_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00800000, nCharsInRange, "CJK Compatibility");
            
            // bit 56
            uint HANGUL_LOW                            = 0xAC00; 
            nCharsInRange = ur.GetRange(HANGUL_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x01000000, nCharsInRange, "Hangul");
            
            // bit 57 surrogates
            if (version > 1)
            {
                bool bSurrogatesCmapPresent = false;
                Table_cmap cmapTable = (Table_cmap)fontOwner.GetTable("cmap");
                if (cmapTable != null)
                {
                    Table_cmap.EncodingTableEntry eteUniSurrogates = cmapTable.GetEncodingTableEntry(3,10);
                    if (eteUniSurrogates != null)
                        bSurrogatesCmapPresent = true;
                }

                if ((ulUnicodeRange2 & 0x02000000) == 0)
                {
                    if (bSurrogatesCmapPresent)
                    {
                        v.Error(T.T_NULL, E.OS_2_E_SurrogatesBitClear, m_tag);
                        bOk = false;
                    }
                }
                else
                {
                    if (!bSurrogatesCmapPresent)
                    {
                        v.Error(T.T_NULL, E.OS_2_E_SurrogatesBitSet, m_tag);
                        bOk = false;
                    }
                }
            }

            // bit 58 reserved
            if ((ulUnicodeRange2 & 0x04000000) != 0)
            {
                bOk = false;
                v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #58");
            }

            // bit 59
            uint CJK_UNIFIED_IDEOGRAPHS_LOW            = 0x4E00; 
            uint CJK_RADICALS_SUPPLEMENT_LOW        = 0x2E80;
            uint KANGXI_RADICALS_LOW                = 0x2F00;
            uint IDEOGRAPHIC_DESCRIPTION_CHARS_LOW    = 0x2FF0;
            uint CJK_UNIFIED_IDEOGRAPHS_EXT_A_LOW   = 0x3400;
            uint CJK_UNIFIED_IDEOGRAPHS_EXT_B_LOW   = 0x20000;
            uint KANBUN_LOW                         = 0x3190;
            nCharsInRange = ur.GetRange(CJK_UNIFIED_IDEOGRAPHS_LOW).count
                          + ur.GetRange(CJK_RADICALS_SUPPLEMENT_LOW).count
                          + ur.GetRange(KANGXI_RADICALS_LOW).count
                          + ur.GetRange(IDEOGRAPHIC_DESCRIPTION_CHARS_LOW).count
                          + ur.GetRange(CJK_UNIFIED_IDEOGRAPHS_EXT_A_LOW).count
                          + ur.GetRange(CJK_UNIFIED_IDEOGRAPHS_EXT_B_LOW).count
                          + ur.GetRange(KANBUN_LOW).count; 
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x08000000, nCharsInRange, "CJK Unified Ideographs, CJK Radicals Supplement, Kangxi Radicals, Ideographic Description Chars, CJK Unified Ideographs Extended A, CJK Unified Ideographs Extended B, Kanbun");
            
            // bit 60
            uint PRIVATE_USE_AREA_LOW                = 0xE000; 
            if (!fontOwner.ContainsMsSymbolEncodedCmap())
            {
                nCharsInRange = ur.GetRange(PRIVATE_USE_AREA_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x10000000, nCharsInRange, "Private Use Area");
            }
            
            // bit 61
            uint CJK_COMPATIBILITY_IDEOGRAPHS_LOW    = 0xF900; 
            uint CJK_COMPATIBILITY_IDEO_SUPP_LOW    = 0x2F800;
            nCharsInRange = ur.GetRange(CJK_COMPATIBILITY_IDEOGRAPHS_LOW).count
                          + ur.GetRange(CJK_COMPATIBILITY_IDEO_SUPP_LOW).count; 
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x20000000, nCharsInRange, "CJK Compatibility Ideographs, CJK Compatibility Ideographs Supplement");
            
            // bit 62
            uint ALPHABETIC_PRESENTATION_FORMS_LOW    = 0xFB00; 
            nCharsInRange = ur.GetRange(ALPHABETIC_PRESENTATION_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x40000000, nCharsInRange, "Alphabetic Presentation Forms");
            
            // bit 63
            uint ARABIC_PRESENTATION_FORMS_A_LOW     = 0xFB50; 
            nCharsInRange = ur.GetRange(ARABIC_PRESENTATION_FORMS_A_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x80000000, nCharsInRange, "Arabic Presentation Forms-A");
            

            
            // bit 64
            uint COMBINING_HALF_MARKS_LOW            = 0xFE20; 
            nCharsInRange = ur.GetRange(COMBINING_HALF_MARKS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000001, nCharsInRange, "Combining Half Marks");
            
            // bit 65
            uint CJK_COMPATIBILITY_FORMS_LOW         = 0xFE30; 
            nCharsInRange = ur.GetRange(CJK_COMPATIBILITY_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000002, nCharsInRange, "CJK Compatibility Forms");
            
            // bit 66
            uint SMALL_FORM_VARIANTS_LOW             = 0xFE50; 
            nCharsInRange = ur.GetRange(SMALL_FORM_VARIANTS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000004, nCharsInRange, "Small Form Variants");
            
            // bit 67
            uint ARABIC_PRESENTATION_FORMS_B_LOW     = 0xFE70; 
            nCharsInRange = ur.GetRange(ARABIC_PRESENTATION_FORMS_B_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000008, nCharsInRange, "Arabic Presentation Forms-B");
            
            // bit 68
            uint HALFWIDTH_FULLWIDTH_FORMS_LOW        = 0xFF00; 
            nCharsInRange = ur.GetRange(HALFWIDTH_FULLWIDTH_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000010, nCharsInRange, "Halfwidth and Fullwidth Forms");
            
            // bit 69
            uint SPECIALS_LOW                        = 0xFFF0; 
            nCharsInRange = ur.GetRange(SPECIALS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000020, nCharsInRange, "Specials");

            if (version < 2)
            {
                for (int bitpos = 6; bitpos < 32; bitpos++)
                {
                    if ((ulUnicodeRange3 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (64+bitpos)); }
                }

                for (int bitpos = 0; bitpos < 32; bitpos++)
                {
                    if ((ulUnicodeRange4 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (96+bitpos)); }
                }
            }
            else
            {
                // bit 70
                uint TIBETAN_LOW                        = 0x0F00; 
                nCharsInRange = ur.GetRange(TIBETAN_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000040, nCharsInRange, "Tibetan");
                
                // bit 71
                uint SYRIAC_LOW                            = 0x0700; 
                nCharsInRange = ur.GetRange(SYRIAC_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000080, nCharsInRange, "Syriac");
                
                // bit 72
                uint THAANA_LOW                            = 0x0780; 
                nCharsInRange = ur.GetRange(THAANA_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000100, nCharsInRange, "Thaana");
                
                // bit 73
                uint SINHALA_LOW                        = 0x0D80; 
                nCharsInRange = ur.GetRange(SINHALA_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000200, nCharsInRange, "Sinhala");
                
                // bit 74
                uint MYANMAR_LOW                        = 0x1000; 
                nCharsInRange = ur.GetRange(MYANMAR_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000400, nCharsInRange, "Myanmar");
                
                // bit 75
                uint ETHIOPIC_LOW                        = 0x1200; 
                nCharsInRange = ur.GetRange(ETHIOPIC_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000800, nCharsInRange, "Ethiopic");
                
                // bit 76
                uint CHEROKEE_LOW                        = 0x13A0; 
                nCharsInRange = ur.GetRange(CHEROKEE_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00001000, nCharsInRange, "Cherokee");
                
                // bit 77
                uint UNIFIED_CANADIAN_AB_SYL_LOW        = 0x1400; 
                nCharsInRange = ur.GetRange(UNIFIED_CANADIAN_AB_SYL_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00002000, nCharsInRange, "Unified Canadian Syllabics");
                
                // bit 78
                uint OGHAM_LOW                            = 0x1680; 
                nCharsInRange = ur.GetRange(OGHAM_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00004000, nCharsInRange, "Ogham");
                
                // bit 79
                uint RUNIC_LOW                            = 0x16A0; 
                nCharsInRange = ur.GetRange(RUNIC_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00008000, nCharsInRange, "Runic");
                
                // bit 80
                uint KHMER_LOW                            = 0x1780; 
                nCharsInRange = ur.GetRange(KHMER_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00010000, nCharsInRange, "Khmer");
                
                // bit 81
                uint MONGOLIAN_LOW                        = 0x1800; 
                nCharsInRange = ur.GetRange(MONGOLIAN_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00020000, nCharsInRange, "Mongolian");
                
                // bit 82
                uint BRAILLE_PATTERNS_LOW                = 0x2800; 
                nCharsInRange = ur.GetRange(BRAILLE_PATTERNS_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00040000, nCharsInRange, "Braille");
                
                // bit 83
                uint YI_LOW                                = 0xA000; 
                uint YI_RADICALS_LOW                    = 0xA490;
                nCharsInRange = ur.GetRange(YI_LOW).count 
                              + ur.GetRange(YI_RADICALS_LOW).count;
                bOk &= VerifyUnicodeRanges(v, ulUnicodeRange3, 0x00080000, nCharsInRange, "Yi, Yi Radicals");
                

                if (version < 3)
                {
                    for (int bitpos = 20; bitpos < 32; bitpos++)
                    {
                        if ((ulUnicodeRange3 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (64+bitpos)); }
                    }

                    for (int bitpos = 0; bitpos < 32; bitpos++)
                    {
                        if ((ulUnicodeRange4 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (96+bitpos)); }
                    }
                }
                else
                {
                    // bit 84
                    uint TAGALOG_LOW                    = 0x1700;
                    uint HANUNOO_LOW                    = 0x1720;
                    uint BUHID_LOW                      = 0x1740;
                    uint TAGBANWA_LOW                   = 0x1760;
                    nCharsInRange = ur.GetRange(TAGALOG_LOW).count
                                  + ur.GetRange(HANUNOO_LOW).count
                                  + ur.GetRange(BUHID_LOW).count
                                  + ur.GetRange(TAGBANWA_LOW).count;
                    bOk &= VerifyUnicodeRanges(v, ulUnicodeRange3, 0x00100000, nCharsInRange, "Tagalog, Hanunoo, Buhid, Tagbanwa");

                    // bit 85
                    uint OLD_ITALIC_LOW                 = 0x10300;
                    nCharsInRange = ur.GetRange(OLD_ITALIC_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00200000, nCharsInRange, "Old Italic");

                    // bit 86
                    uint GOTHIC_LOW                     = 0x10330;
                    nCharsInRange = ur.GetRange(GOTHIC_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00400000, nCharsInRange, "Gothic");

                    // bit 87
                    uint DESERET_LOW                    = 0x10400;
                    nCharsInRange = ur.GetRange(DESERET_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00800000, nCharsInRange, "Deseret");

                    // bit 88
                    uint BYZANTINE_MUSICAL_SYMBOLS_LOW  = 0x1D000;
                    uint MUSICAL_SYMBOLS_LOW            = 0x1D100;
                    nCharsInRange = ur.GetRange(BYZANTINE_MUSICAL_SYMBOLS_LOW).count
                                  + ur.GetRange(MUSICAL_SYMBOLS_LOW).count;
                    bOk &= VerifyUnicodeRanges(v, ulUnicodeRange3, 0x01000000, nCharsInRange, "Byzantine Musical Symbols, Musical Symbols");

                    // bit 89
                    uint MATHEMATICAL_ALPHANUMERIC_LOW  = 0x1D400;
                    nCharsInRange = ur.GetRange(MATHEMATICAL_ALPHANUMERIC_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x02000000, nCharsInRange, "Mathematical Alphanumeric Symbols");

                    // bit 90
                    uint PRIVATE_USE_15_LOW             = 0xFFF80;
                    uint PRIVATE_USE_16_LOW             = 0x10FF80;
                    nCharsInRange = ur.GetRange(PRIVATE_USE_15_LOW).count
                                  + ur.GetRange(PRIVATE_USE_16_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x04000000, nCharsInRange, "Private Use (Plane 15), Private Use (Plane 16)");

                    // bit 91
                    uint VARIATION_SELECTORS_LOW         = 0xE0100;
                    nCharsInRange = ur.GetRange(VARIATION_SELECTORS_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x08000000, nCharsInRange, "Variation Selectors");

                    // bit 92
                    uint TAGS_LOW                       = 0xE0000;
                    nCharsInRange = ur.GetRange(TAGS_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x10000000, nCharsInRange, "Tags");

                    for (int bitpos = 29; bitpos < 32; bitpos++)
                    {
                        if ((ulUnicodeRange3 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (64+bitpos)); }
                    }

                    for (int bitpos = 0; bitpos < 32; bitpos++)
                    {
                        if ((ulUnicodeRange4 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (96+bitpos)); }
                    }
                }
            }

            if (bOk)
            {
                v.Pass(P.OS_2_P_UnicodeRanges, m_tag);
            }
            else
            {
                bRet = false;
            }

            return bRet;
        }
Beispiel #6
0
        private bool CheckUnicodeRanges(Validator v, OTFont fontOwner)
        {
            bool bRet = true;
            bool bOk = true;

            if (version == 0)
            {
                if (ulUnicodeRange1 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[0] was undefined. All bits must be 0.");
                    bOk = false;
                }
                if (ulUnicodeRange2 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[1] was undefined. All bits must be 0.");
                    bOk = false;
                }
                if (ulUnicodeRange3 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[2] was undefined. All bits must be 0.");
                    bOk = false;
                }
                if (ulUnicodeRange4 != 0)
                {
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "Range[3] was undefined. All bits must be 0.");
                    bOk = false;
                }

                if (bOk)
                {
                    v.Pass(P.OS_2_P_UnicodeRanges, m_tag);
                }
                else
                {
                    bRet = false;
                }

                return bRet;
            }

            // count the number of entries in each unicode range in the cmap subtable

            UnicodeRanges ur = new UnicodeRanges();
            for (uint c = 0; c < 0xffff; c++)
            {
                // check if c is mapped to a glyph
                uint iGlyph = fontOwner.FastMapUnicodeToGlyphID((char)c);
                if (iGlyph != 0)
                {
                    UnicodeRanges.Range r = ur.GetRange(c);
                    if (r != null)
                    {
                        r.count++;
                    }
                }
            }

            uint nCharsInRange;

            // bit 0
            uint BASIC_LATIN_LOW                     = 0x0020;
            nCharsInRange = ur.GetRange(BASIC_LATIN_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000001, nCharsInRange, "Basic Latin");

            // bit 1
            uint LATIN_1_SUPPLEMENT_LOW                = 0x00A0;
            nCharsInRange = ur.GetRange(LATIN_1_SUPPLEMENT_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000002, nCharsInRange, "Latin-1 Supplement");

            // bit 2
            uint LATIN_EXTENDED_A_LOW                = 0x0100;
            nCharsInRange = ur.GetRange(LATIN_EXTENDED_A_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000004, nCharsInRange, "Latin Extended-A");

            // bit 3
            uint LATIN_EXTENDED_B_LOW                = 0x0180;
            nCharsInRange = ur.GetRange(LATIN_EXTENDED_B_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000008, nCharsInRange, "Latin Extended-B");

            // bit 4
            uint IPA_EXTENSIONS_LOW                    = 0x0250;
            uint Phonetic_Extensions                   = 0x1D00;
            uint Phonetic_Extensions_Supplement        = 0x1D80;
            nCharsInRange = ur.GetRange(IPA_EXTENSIONS_LOW).count
                + ur.GetRange(Phonetic_Extensions).count
                + ur.GetRange(Phonetic_Extensions_Supplement).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000010, nCharsInRange, "IPA Extensions");

            // bit 5
            uint SPACING_MODIFIER_LETTERS_LOW        = 0x02B0;
            uint Modifier_Tone_Letters               = 0xA700;
            nCharsInRange = ur.GetRange(SPACING_MODIFIER_LETTERS_LOW).count
                + ur.GetRange(Modifier_Tone_Letters).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000020, nCharsInRange, "Spacing Modifier Letters");

            // bit 6
            uint COMBINING_DIACRITICAL_MARKS_LOW     = 0x0300;
            uint Combining_Diacritical_Marks_Supplement = 0x1DC0;
            nCharsInRange = ur.GetRange(COMBINING_DIACRITICAL_MARKS_LOW).count
                + ur.GetRange(Combining_Diacritical_Marks_Supplement).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000040, nCharsInRange, "Combining Diacritical Marks");

            // bit 7
            uint GREEK_LOW                             = 0x0370;
            nCharsInRange = ur.GetRange(GREEK_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000080, nCharsInRange, "Greek");

            // v1: Greek Symbols and Coptic
            // v2/v3: Reserved for Unicode SubRanges
            // v4: Coptic                                  2C80-2CFF
            if (version > 1 && version < 4)
            {
                // bit 8 reserved
                if ((ulUnicodeRange1 & 0x00000100) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #8");
                }
            }
            // TODO: v4

            // bit 9
            uint CYRILLIC_LOW                        = 0x0400;
            uint CYRILLIC_SUPPLEMENTARY_LOW         = 0x0500;
            uint Cyrillic_Extended_A                = 0x2DE0;
            uint Cyrillic_Extended_B                = 0xA640;
            nCharsInRange = ur.GetRange(CYRILLIC_LOW).count
                          + ur.GetRange(CYRILLIC_SUPPLEMENTARY_LOW).count
                + ur.GetRange(Cyrillic_Extended_A).count
                + ur.GetRange(Cyrillic_Extended_B).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange1, 0x00000200, nCharsInRange, "Cyrillic, Cyrillic Supplementary");

            // bit 10
            uint ARMENIAN_LOW                        = 0x0530;
            nCharsInRange = ur.GetRange(ARMENIAN_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000400, nCharsInRange, "Armenian");

            // bit 11
            uint HEBREW_LOW                            = 0x0590;
            nCharsInRange = ur.GetRange(HEBREW_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00000800, nCharsInRange, "Hebrew");

            // v1: Hebrew Extended (A and B blocks combined)
            // v2/v3: Reserved for Unicode SubRanges
            // v4: Vai                                     A500-A63F
            if (version > 1 && version < 4)
            {
                // bit 12 reserved
                if ((ulUnicodeRange1 & 0x00001000) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #12");
                }
            }
            // TODO: v4

            // bit 13
            uint ARABIC_LOW                            = 0x0600;
            uint Arabic_Supplement                     = 0x0750;
            nCharsInRange = ur.GetRange(ARABIC_LOW).count
                + ur.GetRange(Arabic_Supplement).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00002000, nCharsInRange, "Arabic");

            // v1: Arabic Extended
            // v2/v3: Reserved for Unicode SubRanges
            // v4: NKo                                     07C0-07FF
            if (version > 1 && version < 4)
            {
                // bit 14 reserved
                if ((ulUnicodeRange1 & 0x00004000) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #14");
                }
            }
            // TODO: v4

            // bit 15
            uint DEVANAGARI_LOW                        = 0x0900;
            nCharsInRange = ur.GetRange(DEVANAGARI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00008000, nCharsInRange, "Devanagari");

            // bit 16
            uint BENGALI_LOW                         = 0x0980;
            nCharsInRange = ur.GetRange(BENGALI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00010000, nCharsInRange, "Bengali");

            // bit 17
            uint GURMUKHI_LOW                        = 0x0A00;
            nCharsInRange = ur.GetRange(GURMUKHI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00020000, nCharsInRange, "Gurmukhi");

            // bit 18
            uint GUJARATI_LOW                        = 0x0A80;
            nCharsInRange = ur.GetRange(GUJARATI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00040000, nCharsInRange, "Gujarati");

            // bit 19
            uint ORIYA_LOW                            = 0x0B00;
            nCharsInRange = ur.GetRange(ORIYA_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00080000, nCharsInRange, "Oriya");

            // bit 20
            uint TAMIL_LOW                            = 0x0B80;
            nCharsInRange = ur.GetRange(TAMIL_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00100000, nCharsInRange, "Tamil");

            // bit 21
            uint TELUGU_LOW                            = 0x0C00;
            nCharsInRange = ur.GetRange(TELUGU_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00200000, nCharsInRange, "Telugu");

            // bit 22
            uint KANNADA_LOW                         = 0x0C80;
            nCharsInRange = ur.GetRange(KANNADA_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00400000, nCharsInRange, "Kannada");

            // bit 23
            uint MALAYALAM_LOW                        = 0x0D00;
            nCharsInRange = ur.GetRange(MALAYALAM_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x00800000, nCharsInRange, "Malayalam");

            // bit 24
            uint THAI_LOW                            = 0x0E00;
            nCharsInRange = ur.GetRange(THAI_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x01000000, nCharsInRange, "Thai");

            // bit 25
            uint LAO_LOW                             = 0x0E80;
            nCharsInRange = ur.GetRange(LAO_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x02000000, nCharsInRange, "Lao");

            // v3: Georgian
            // v4: Georgian                                10A0-10FF
            //     Georgian Supplement                     2D00-2D2F
            // TODO: v4
            // bit 26
            uint GEORGIAN_LOW                        = 0x10A0;
            uint Georgian_Supplement                 = 0x2D00;
            nCharsInRange = ur.GetRange(GEORGIAN_LOW).count
                + ur.GetRange(Georgian_Supplement).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x04000000, nCharsInRange, "Georgian");

            // v1: Georgian Extended
            // v2/v3: Reserved for Unicode SubRanges
            // v4: Balinese                                1B00-1B7F
            if (version > 1 && version < 4)
            {
                // bit 27 reserved
                if ((ulUnicodeRange1 & 0x08000000) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #27");
                }
            }
            // TODO: v4

            // bit 28
            uint HANGUL_JAMO_LOW                     = 0x1100;
            nCharsInRange = ur.GetRange(HANGUL_JAMO_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x10000000, nCharsInRange, "Hangul Jamo");

            // bit 29
            uint LATIN_EXTENDED_ADDITIONAL_LOW        = 0x1E00;
            uint Latin_Extended_C                     = 0x2C60;
            uint Latin_Extended_D                     = 0xA720;
            nCharsInRange = ur.GetRange(LATIN_EXTENDED_ADDITIONAL_LOW).count
                + ur.GetRange(Latin_Extended_C).count
                + ur.GetRange(Latin_Extended_D).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x20000000, nCharsInRange, "Latin Extended Additional");

            // bit 30
            uint GREEK_EXTENDED_LOW                    = 0x1F00;
            nCharsInRange = ur.GetRange(GREEK_EXTENDED_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x40000000, nCharsInRange, "Greek Extended");

            // bit 31
            uint GENERAL_PUNCTUATION_LOW             = 0x2000;
            uint Supplemental_Punctuation            = 0x2E00;
            nCharsInRange = ur.GetRange(GENERAL_PUNCTUATION_LOW).count
                + ur.GetRange(Supplemental_Punctuation).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange1, 0x80000000, nCharsInRange, "General Punctuation");

            // bit 32
            uint SUPER_SUB_SCRIPTS_LOW                = 0x2070;
            nCharsInRange = ur.GetRange(SUPER_SUB_SCRIPTS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000001, nCharsInRange, "Superscripts and Subscripts");

            // bit 33
            uint CURRENCY_SYMBOLS_LOW                = 0x20A0;
            nCharsInRange = ur.GetRange(CURRENCY_SYMBOLS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000002, nCharsInRange, "Currency Symbols");

            // bit 34
            uint SYMBOL_COMBINING_MARKS_LOW            = 0x20D0;
            nCharsInRange = ur.GetRange(SYMBOL_COMBINING_MARKS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000004, nCharsInRange, "Combining Diacritical marks for symbols");

            // bit 35
            uint LETTERLIKE_SYMBOLS_LOW                = 0x2100;
            nCharsInRange = ur.GetRange(LETTERLIKE_SYMBOLS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000008, nCharsInRange, "Letterlike Symbols");

            // bit 36
            uint NUMBER_FORMS_LOW                    = 0x2150;
            nCharsInRange = ur.GetRange(NUMBER_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000010, nCharsInRange, "Number Forms");

            // bit 37
            uint ARROWS_LOW                            = 0x2190;
            uint SUPPLEMENTAL_ARROWS_A_LOW            = 0x27F0;
            uint SUPPLEMENTAL_ARROWS_B_LOW            = 0x2900;
            uint Miscellaneous_Symbols_and_Arrows     = 0x2B00;
            nCharsInRange = ur.GetRange(ARROWS_LOW).count
                          + ur.GetRange(SUPPLEMENTAL_ARROWS_A_LOW).count
                          + ur.GetRange(SUPPLEMENTAL_ARROWS_B_LOW).count
                + ur.GetRange(Miscellaneous_Symbols_and_Arrows).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x00000020, nCharsInRange, "Arrows, Supplementary Arrows A, Supplementary Arrows B");

            // bit 38
            uint MATH_OPERATORS_LOW                    = 0x2200;
            uint SUPPLEMENTAL_MATH_OPERATORS_LOW    = 0x2A00;
            uint MISC_MATH_SYMBOLS_A_LOW            = 0x27C0;
            uint MISC_MATH_SYMBOLS_B_LOW            = 0x2980;
            nCharsInRange = ur.GetRange(MATH_OPERATORS_LOW).count
                          + ur.GetRange(SUPPLEMENTAL_MATH_OPERATORS_LOW).count
                          + ur.GetRange(MISC_MATH_SYMBOLS_A_LOW).count
                          + ur.GetRange(MISC_MATH_SYMBOLS_B_LOW).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x00000040, nCharsInRange, "Mathematical Operators, Supplemental Mathematical Operators, Mathematical Symbols A, Mathematical Symbols B");

            // bit 39
            uint MISC_TECHNICAL_LOW                    = 0x2300;
            nCharsInRange = ur.GetRange(MISC_TECHNICAL_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000080, nCharsInRange, "Miscellaneous Technical");

            // bit 40
            uint CONTROL_PICTURES_LOW                = 0x2400;
            nCharsInRange = ur.GetRange(CONTROL_PICTURES_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000100, nCharsInRange, "Control Pictures");

            // bit 41
            uint OCR_LOW                             = 0x2440;
            nCharsInRange = ur.GetRange(OCR_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000200, nCharsInRange, "Optical Character Recognition");

            // bit 42
            uint ENCLOSED_ALPHANUMERICS_LOW            = 0x2460;
            nCharsInRange = ur.GetRange(ENCLOSED_ALPHANUMERICS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000400, nCharsInRange, "Enclosed Alphanumerics");

            // bit 43
            uint BOX_DRAWING_LOW                     = 0x2500;
            nCharsInRange = ur.GetRange(BOX_DRAWING_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00000800, nCharsInRange, "Box Drawing");

            // bit 44
            uint BLOCK_ELEMENTS_LOW                    = 0x2580;
            nCharsInRange = ur.GetRange(BLOCK_ELEMENTS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00001000, nCharsInRange, "Block Elements");

            // bit 45
            uint GEOMETRIC_SHAPES_LOW                = 0x25A0;
            nCharsInRange = ur.GetRange(GEOMETRIC_SHAPES_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00002000, nCharsInRange, "Geometric Shapes");

            // bit 46
            uint MISC_SYMBOLS_LOW                    = 0x2600;
            nCharsInRange = ur.GetRange(MISC_SYMBOLS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00004000, nCharsInRange, "Miscellaneous Symbols");

            // bit 47
            uint DINGBATS_LOW                        = 0x2700;
            nCharsInRange = ur.GetRange(DINGBATS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00008000, nCharsInRange, "Dingbats");

            // bit 48
            uint CJK_SYMBOLS_PUNCTUATION_LOW         = 0x3000;
            nCharsInRange = ur.GetRange(CJK_SYMBOLS_PUNCTUATION_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00010000, nCharsInRange, "CJK Symbols and Punctuation");

            // bit 49
            uint HIRAGANA_LOW                        = 0x3040;
            nCharsInRange = ur.GetRange(HIRAGANA_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00020000, nCharsInRange, "Hiragana");

            // bit 50
            uint KATAKANA_LOW                        = 0x30A0;
            uint Katakana_Phonetic_Extensions        = 0x31F0;
            nCharsInRange = ur.GetRange(KATAKANA_LOW).count
                + ur.GetRange(Katakana_Phonetic_Extensions).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00040000, nCharsInRange, "Katakana");

            // bit 51
            uint BOPOMOFO_LOW                        = 0x3100;
            uint BOPOMOFO_EXTENDED_LOW                = 0x31A0;
            nCharsInRange = ur.GetRange(BOPOMOFO_LOW).count
                          + ur.GetRange(BOPOMOFO_EXTENDED_LOW).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x00080000, nCharsInRange, "Bopomofo, Bopomofo Extended");

            // bit 52
            uint HANGUL_COMPAT_JAMO_LOW                = 0x3130;
            nCharsInRange = ur.GetRange(HANGUL_COMPAT_JAMO_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00100000, nCharsInRange, "Hangul Compatibility Jamo");

            // v2: CJK Miscellaneous
            // v3: Reserved for Unicode SubRanges
            // v4: Phags-pa                                A840-A87F
            // bit 53 !!! OT Spec 1.3 says bit 53 is CJK Misc !!!
            //        !!! Since there's no unicode range name !!!
            //        !!! that maps nicely to CJK Misc,       !!!
            //        !!! I naturally just ignore it.         !!!
            /*
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00200000, nCharsInRange, "CJK Miscellaneous");
            */
            // TODO: v4

            // bit 54
            uint ENCLOSED_CJK_LETTERS_MONTHS_LOW     = 0x3200;
            nCharsInRange = ur.GetRange(ENCLOSED_CJK_LETTERS_MONTHS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00400000, nCharsInRange, "Enclosed CJK Letters and Months");

            // bit 55
            uint CJK_COMPATIBILITY_LOW                = 0x3300;
            nCharsInRange = ur.GetRange(CJK_COMPATIBILITY_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x00800000, nCharsInRange, "CJK Compatibility");

            // bit 56
            uint HANGUL_LOW                            = 0xAC00;
            nCharsInRange = ur.GetRange(HANGUL_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x01000000, nCharsInRange, "Hangul");

            // v1: Reserved for Unicode SubRanges
            // v2: Surrogates
            // v3: Non-Plane 0 *
            // v4: Non-Plane 0 *                           D800-DFFF
            // bit 57 surrogates
            if (version > 1)
            {
                bool bSurrogatesCmapPresent = false;
                Table_cmap cmapTable = (Table_cmap)fontOwner.GetTable("cmap");
                if (cmapTable != null)
                {
                    Table_cmap.EncodingTableEntry eteUniSurrogates = cmapTable.GetEncodingTableEntry(3,10);
                    if (eteUniSurrogates != null)
                        bSurrogatesCmapPresent = true;
                }

                if ((ulUnicodeRange2 & 0x02000000) == 0)
                {
                    if (bSurrogatesCmapPresent)
                    {
                        v.Error(T.T_NULL, E.OS_2_E_SurrogatesBitClear, m_tag);
                        bOk = false;
                    }
                }
                else
                {
                    if (!bSurrogatesCmapPresent)
                    {
                        v.Error(T.T_NULL, E.OS_2_E_SurrogatesBitSet, m_tag);
                        bOk = false;
                    }
                }
            }

            // v1/v2/v3: Reserved for Unicode SubRanges
            // v4: Phoenician                              10900-1091F
            if (version < 4)
            {
                // bit 58 reserved
                if ((ulUnicodeRange2 & 0x04000000) != 0)
                {
                    bOk = false;
                    v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #58");
                }
            }
            // TODO: v4

            // bit 59
            uint CJK_UNIFIED_IDEOGRAPHS_LOW            = 0x4E00;
            uint CJK_RADICALS_SUPPLEMENT_LOW        = 0x2E80;
            uint KANGXI_RADICALS_LOW                = 0x2F00;
            uint IDEOGRAPHIC_DESCRIPTION_CHARS_LOW    = 0x2FF0;
            uint CJK_UNIFIED_IDEOGRAPHS_EXT_A_LOW   = 0x3400;
            uint CJK_UNIFIED_IDEOGRAPHS_EXT_B_LOW   = 0x20000;
            uint KANBUN_LOW                         = 0x3190;
            nCharsInRange = ur.GetRange(CJK_UNIFIED_IDEOGRAPHS_LOW).count
                          + ur.GetRange(CJK_RADICALS_SUPPLEMENT_LOW).count
                          + ur.GetRange(KANGXI_RADICALS_LOW).count
                          + ur.GetRange(IDEOGRAPHIC_DESCRIPTION_CHARS_LOW).count
                          + ur.GetRange(CJK_UNIFIED_IDEOGRAPHS_EXT_A_LOW).count
                          + ur.GetRange(CJK_UNIFIED_IDEOGRAPHS_EXT_B_LOW).count
                          + ur.GetRange(KANBUN_LOW).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x08000000, nCharsInRange, "CJK Unified Ideographs, CJK Radicals Supplement, Kangxi Radicals, Ideographic Description Chars, CJK Unified Ideographs Extended A, CJK Unified Ideographs Extended B, Kanbun");

            // bit 60
            uint PRIVATE_USE_AREA_LOW                = 0xE000;
            if (!fontOwner.ContainsMsSymbolEncodedCmap())
            {
                nCharsInRange = ur.GetRange(PRIVATE_USE_AREA_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x10000000, nCharsInRange, "Private Use Area");
            }

            // bit 61
            uint CJK_Strokes                         = 0x31C0;
            uint CJK_COMPATIBILITY_IDEOGRAPHS_LOW    = 0xF900;
            uint CJK_COMPATIBILITY_IDEO_SUPP_LOW    = 0x2F800;
            nCharsInRange = ur.GetRange(CJK_Strokes).count
                + ur.GetRange(CJK_COMPATIBILITY_IDEOGRAPHS_LOW).count
                          + ur.GetRange(CJK_COMPATIBILITY_IDEO_SUPP_LOW).count;
            bOk &= VerifyUnicodeRanges(v, ulUnicodeRange2, 0x20000000, nCharsInRange, "CJK Compatibility Ideographs, CJK Compatibility Ideographs Supplement");

            // bit 62
            uint ALPHABETIC_PRESENTATION_FORMS_LOW    = 0xFB00;
            nCharsInRange = ur.GetRange(ALPHABETIC_PRESENTATION_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x40000000, nCharsInRange, "Alphabetic Presentation Forms");

            // bit 63
            uint ARABIC_PRESENTATION_FORMS_A_LOW     = 0xFB50;
            nCharsInRange = ur.GetRange(ARABIC_PRESENTATION_FORMS_A_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange2, 0x80000000, nCharsInRange, "Arabic Presentation Forms-A");

            // bit 64
            uint COMBINING_HALF_MARKS_LOW            = 0xFE20;
            nCharsInRange = ur.GetRange(COMBINING_HALF_MARKS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000001, nCharsInRange, "Combining Half Marks");

            // bit 65
            uint Vertical_Forms                      = 0xFE10;
            uint CJK_COMPATIBILITY_FORMS_LOW         = 0xFE30;
            nCharsInRange = ur.GetRange(Vertical_Forms).count
                + ur.GetRange(CJK_COMPATIBILITY_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000002, nCharsInRange, "CJK Compatibility Forms");

            // bit 66
            uint SMALL_FORM_VARIANTS_LOW             = 0xFE50;
            nCharsInRange = ur.GetRange(SMALL_FORM_VARIANTS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000004, nCharsInRange, "Small Form Variants");

            // bit 67
            uint ARABIC_PRESENTATION_FORMS_B_LOW     = 0xFE70;
            nCharsInRange = ur.GetRange(ARABIC_PRESENTATION_FORMS_B_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000008, nCharsInRange, "Arabic Presentation Forms-B");

            // bit 68
            uint HALFWIDTH_FULLWIDTH_FORMS_LOW        = 0xFF00;
            nCharsInRange = ur.GetRange(HALFWIDTH_FULLWIDTH_FORMS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000010, nCharsInRange, "Halfwidth and Fullwidth Forms");

            // bit 69
            uint SPECIALS_LOW                        = 0xFFF0;
            nCharsInRange = ur.GetRange(SPECIALS_LOW).count;
            bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000020, nCharsInRange, "Specials");

            // v1: 70-127 Reserved for Unicode SubRanges
            if (version < 2)
            {
                for (int bitpos = 6; bitpos < 32; bitpos++)
                {
                    if ((ulUnicodeRange3 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (64+bitpos)); }
                }

                for (int bitpos = 0; bitpos < 32; bitpos++)
                {
                    if ((ulUnicodeRange4 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (96+bitpos)); }
                }
            }
            else
            {
                // bit 70
                uint TIBETAN_LOW                        = 0x0F00;
                nCharsInRange = ur.GetRange(TIBETAN_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000040, nCharsInRange, "Tibetan");

                // bit 71
                uint SYRIAC_LOW                            = 0x0700;
                nCharsInRange = ur.GetRange(SYRIAC_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000080, nCharsInRange, "Syriac");

                // bit 72
                uint THAANA_LOW                            = 0x0780;
                nCharsInRange = ur.GetRange(THAANA_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000100, nCharsInRange, "Thaana");

                // bit 73
                uint SINHALA_LOW                        = 0x0D80;
                nCharsInRange = ur.GetRange(SINHALA_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000200, nCharsInRange, "Sinhala");

                // bit 74
                uint MYANMAR_LOW                        = 0x1000;
                nCharsInRange = ur.GetRange(MYANMAR_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000400, nCharsInRange, "Myanmar");

                // bit 75
                uint ETHIOPIC_LOW                        = 0x1200;
                uint Ethiopic_Supplement                 = 0x1380;
                uint Ethiopic_Extended                   = 0x2D80;
                nCharsInRange = ur.GetRange(ETHIOPIC_LOW).count
                    + ur.GetRange(Ethiopic_Supplement).count
                    + ur.GetRange(Ethiopic_Extended).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00000800, nCharsInRange, "Ethiopic");

                // bit 76
                uint CHEROKEE_LOW                        = 0x13A0;
                nCharsInRange = ur.GetRange(CHEROKEE_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00001000, nCharsInRange, "Cherokee");

                // bit 77
                uint UNIFIED_CANADIAN_AB_SYL_LOW        = 0x1400;
                nCharsInRange = ur.GetRange(UNIFIED_CANADIAN_AB_SYL_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00002000, nCharsInRange, "Unified Canadian Syllabics");

                // bit 78
                uint OGHAM_LOW                            = 0x1680;
                nCharsInRange = ur.GetRange(OGHAM_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00004000, nCharsInRange, "Ogham");

                // bit 79
                uint RUNIC_LOW                            = 0x16A0;
                nCharsInRange = ur.GetRange(RUNIC_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00008000, nCharsInRange, "Runic");

                // bit 80
                uint KHMER_LOW                            = 0x1780;
                uint Khmer_Symbols                        = 0x19E0;
                nCharsInRange = ur.GetRange(KHMER_LOW).count
                    + ur.GetRange(Khmer_Symbols).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00010000, nCharsInRange, "Khmer");

                // bit 81
                uint MONGOLIAN_LOW                        = 0x1800;
                nCharsInRange = ur.GetRange(MONGOLIAN_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00020000, nCharsInRange, "Mongolian");

                // bit 82
                uint BRAILLE_PATTERNS_LOW                = 0x2800;
                nCharsInRange = ur.GetRange(BRAILLE_PATTERNS_LOW).count;
                bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00040000, nCharsInRange, "Braille");

                // bit 83
                uint YI_LOW                                = 0xA000;
                uint YI_RADICALS_LOW                    = 0xA490;
                nCharsInRange = ur.GetRange(YI_LOW).count
                              + ur.GetRange(YI_RADICALS_LOW).count;
                bOk &= VerifyUnicodeRanges(v, ulUnicodeRange3, 0x00080000, nCharsInRange, "Yi, Yi Radicals");

                // v2: 84-127 Reserved for Unicode SubRanges
                if (version < 3)
                {
                    for (int bitpos = 20; bitpos < 32; bitpos++)
                    {
                        if ((ulUnicodeRange3 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (64+bitpos)); }
                    }

                    for (int bitpos = 0; bitpos < 32; bitpos++)
                    {
                        if ((ulUnicodeRange4 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (96+bitpos)); }
                    }
                }
                else
                {
                    // bit 84
                    uint TAGALOG_LOW                    = 0x1700;
                    uint HANUNOO_LOW                    = 0x1720;
                    uint BUHID_LOW                      = 0x1740;
                    uint TAGBANWA_LOW                   = 0x1760;
                    nCharsInRange = ur.GetRange(TAGALOG_LOW).count
                                  + ur.GetRange(HANUNOO_LOW).count
                                  + ur.GetRange(BUHID_LOW).count
                                  + ur.GetRange(TAGBANWA_LOW).count;
                    bOk &= VerifyUnicodeRanges(v, ulUnicodeRange3, 0x00100000, nCharsInRange, "Tagalog, Hanunoo, Buhid, Tagbanwa");

                    // bit 85
                    uint OLD_ITALIC_LOW                 = 0x10300;
                    nCharsInRange = ur.GetRange(OLD_ITALIC_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00200000, nCharsInRange, "Old Italic");

                    // bit 86
                    uint GOTHIC_LOW                     = 0x10330;
                    nCharsInRange = ur.GetRange(GOTHIC_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00400000, nCharsInRange, "Gothic");

                    // bit 87
                    uint DESERET_LOW                    = 0x10400;
                    nCharsInRange = ur.GetRange(DESERET_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x00800000, nCharsInRange, "Deseret");

                    // bit 88
                    uint BYZANTINE_MUSICAL_SYMBOLS_LOW  = 0x1D000;
                    uint MUSICAL_SYMBOLS_LOW            = 0x1D100;
                    uint Ancient_Greek_Musical_Notation = 0x1D200;
                    nCharsInRange = ur.GetRange(BYZANTINE_MUSICAL_SYMBOLS_LOW).count
                                  + ur.GetRange(MUSICAL_SYMBOLS_LOW).count
                        + ur.GetRange(Ancient_Greek_Musical_Notation).count;
                    bOk &= VerifyUnicodeRanges(v, ulUnicodeRange3, 0x01000000, nCharsInRange, "Byzantine Musical Symbols, Musical Symbols");

                    // bit 89
                    uint MATHEMATICAL_ALPHANUMERIC_LOW  = 0x1D400;
                    nCharsInRange = ur.GetRange(MATHEMATICAL_ALPHANUMERIC_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x02000000, nCharsInRange, "Mathematical Alphanumeric Symbols");

                    // bit 90
                    uint PRIVATE_USE_15_LOW             = 0xFFF80;
                    uint PRIVATE_USE_16_LOW             = 0x10FF80;
                    nCharsInRange = ur.GetRange(PRIVATE_USE_15_LOW).count
                                  + ur.GetRange(PRIVATE_USE_16_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x04000000, nCharsInRange, "Private Use (Plane 15), Private Use (Plane 16)");

                    // bit 91
                    uint Variation_Selectors             = 0xFE00;
                    uint VARIATION_SELECTORS_SUPP         = 0xE0100;
                    nCharsInRange = ur.GetRange(Variation_Selectors).count
                        + ur.GetRange(VARIATION_SELECTORS_SUPP).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x08000000, nCharsInRange, "Variation Selectors");

                    // bit 92
                    uint TAGS_LOW                       = 0xE0000;
                    nCharsInRange = ur.GetRange(TAGS_LOW).count;
                    bOk &= VerifyUnicodeRange(v, ulUnicodeRange3, 0x10000000, nCharsInRange, "Tags");

                    // v3: 93-127 Reserved for Unicode SubRanges
                    if (version < 4)
                    {
                        for (int bitpos = 29; bitpos < 32; bitpos++)
                        {
                            if ((ulUnicodeRange3 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (64+bitpos)); }
                        }

                        for (int bitpos = 0; bitpos < 32; bitpos++)
                        {
                            if ((ulUnicodeRange4 & (1<<bitpos)) != 0) { bOk = false; v.Error(T.T_NULL, E.OS_2_E_ReservedBitSet_Unicode, m_tag, "bit #" + (96+bitpos)); }
                        }
                    }
                    else
                    {
                    // v4 addition

                    /*
                      93      Limbu                                   1900-194F
                      94      Tai Le                                  1950-197F
                      95      New Tai Lue                             1980-19DF
                      96      Buginese                                1A00-1A1F
                      97      Glagolitic                              2C00-2C5F
                      98      Tifinagh                                2D30-2D7F
                      99      Yijing Hexagram Symbols                 4DC0-4DFF
                      100     Syloti Nagri                            A800-A82F
                      101     Linear B Syllabary                      10000-1007F
                      Linear B Ideograms                      10080-100FF
                      Aegean Numbers                          10100-1013F
                      102     Ancient Greek Numbers                   10140-1018F
                      103     Ugaritic                                10380-1039F
                      104     Old Persian                             103A0-103DF
                      105     Shavian                                 10450-1047F
                      106     Osmanya                                 10480-104AF
                      107     Cypriot Syllabary                       10800-1083F
                      108     Kharoshthi                              10A00-10A5F
                      109     Tai Xuan Jing Symbols                   1D300-1D35F
                      110     Cuneiform                               12000-123FF
                      Cuneiform Numbers and Punctuation       12400-1247F
                      111     Counting Rod Numerals                   1D360-1D37F
                      112     Sundanese                               1B80-1BBF
                      113     Lepcha                                  1C00-1C4F
                      114     Ol Chiki                                1C50-1C7F
                      115     Saurashtra                              A880-A8DF
                      116     Kayah Li                                A900-A92F
                      117     Rejang                                  A930-A95F
                      118     Cham                                    AA00-AA5F
                      119     Ancient Symbols                         10190-101CF
                      120     Phaistos Disc                           101D0-101FF
                      121     Carian                                  102A0-102DF
                      Lycian                                  10280-1029F
                      Lydian                                  10920-1093F
                      122     Domino Tiles                            1F030-1F09F
                      Mahjong Tiles                           1F000-1F02F
                      123-127 Reserved for process-internal usage
                     */

                        // TODO: v4 addition
                    }
                }
            }

            if (bOk)
            {
                v.Pass(P.OS_2_P_UnicodeRanges, m_tag);
            }
            else
            {
                bRet = false;
            }

            return bRet;
        }
Beispiel #7
0
            public GrmGrammar()
            {
                this.Resolve = ((IReadOnlyDictionary <SymbolId, string>) new Dictionary <SymbolId, string>()
                {
                    { SymbolId.Eof, "(EOF)" },
                    { SymUnknown, "(Unknown)" },
                    { SymWhitespace, "(Whitespace)" },
                    { SymNewline, "(Newline)" },
                    { SymLineComment, "(LineComment)" },
                    { SymBlockComment, "(BlockComment)" },
                    { SymParameterName, "ParameterName" },
                    { SymNonterminal, "Nonterminal" },
                    { SymTerminal, "Terminal" },
                    { SymAssign, "=" },
                    { SymDefine, "::=" },
                    { SymQuestion, "?" },
                    { SymStar, "*" },
                    { SymParensOpen, "(" },
                    { SymParensClose, ")" },
                    { SymPlus, "+" },
                    { SymMinus, "-" },
                    { SymOr, "|" },
                    { SymSetLiteral, "SetLiteral" },
                    { SymSetName, "SetName" },
                    { SymInit, "<Init>" },
                    { SymGrammar, "<Grammar>" },
                    { SymContent, "<Content>" },
                    { SymDefinition, "<Definition>" },
                    { SymNlOpt, "<NlOpt>" },
                    { SymNl, "<Nl>" },
                    { SymParameter, "<Parameter>" },
                    { SymParameterBody, "<ParameterBody>" },
                    { SymParameterItems, "<ParameterItems>" },
                    { SymParameterItem, "<ParameterItem>" },
                    { SymSetDecl, "<SetDecl>" },
                    { SymSetExp, "<SetExp>" },
                    { SymSetItem, "<SetItem>" },
                    { SymTerminalDecl, "<TerminalDecl>" },
                    { SymTerminalName, "<TerminalName>" },
                    { SymRegExp, "<RegExp>" },
                    { SymRegExpSeq, "<RegExpSeq>" },
                    { SymRegExpItem, "<RegExpItem>" },
                    { SymRegExp2, "<RegExp2>" },
                    { SymKleeneOpt, "<KleeneOpt>" },
                    { SymRuleDecl, "<RuleDecl>" },
                    { SymHandles, "<Handles>" },
                    { SymHandle, "<Handle>" },
                    { SymSymbol, "<Symbol>" },
                })
                               .CreateGetter();
                var mapper              = new UnicodeUtf16Mapper(false, false);
                var charsetPrintable    = Codepoints.ValidBmp - UnicodeRanges.FromUnicodeCategory(UnicodeCategory.Control) - UnicodeRanges.InCombiningDiacriticalMarks;
                var charsetAlphanumeric = UnicodeRanges.Letter | UnicodeRanges.Number;
                var charset             = new UnicodeCharSetProvider(new Dictionary <string, RangeSet <Codepoint> >()
                {
                    { "Parameter Ch", charsetPrintable - (Codepoint)'\'' - (Codepoint)'"' },
                    { "Nonterminal Ch", charsetAlphanumeric | '_' | '-' | '.' | ' ' },
                    { "Terminal Ch", charsetAlphanumeric | '_' | '-' | '.' },
                    { "Literal Ch", charsetPrintable - (Codepoint)'\'' },
                    { "Set Literal Ch", charsetPrintable - (Codepoint)'[' - (Codepoint)']' - (Codepoint)'\'' },
                    { "Set Name Ch", charsetPrintable - (Codepoint)'{' - (Codepoint)'}' },
                    { "Whitespace Ch", UnicodeRanges.SpaceSeparator | '\t' | '\v' }
                });

                this.DfaStateMachine = new LexerBuilder <char>(mapper, Utf16Chars.EOF, charset)
                {
                    { SymParameterName, @"""{Parameter Ch}+""" },
                    { SymNonterminal, @"<{Nonterminal Ch}+>" },
                    { SymTerminal, @"{Terminal Ch}+|'{Literal Ch}*'" },
                    { SymSetLiteral, @"\[({Set Literal Ch}+|'{Literal Ch}*')+\]" },
                    { SymSetName, @"\{{Set Name Ch}+\}" },
                    { SymWhitespace, @"{Whitespace Ch}+" },
                    { SymNewline, @"\r\n?|\n\r?" },
                    { SymLineComment, @"![^\r\n]*" },
                    { SymBlockComment, @"!\*([^\*]|\*[^!])*\*!" },
                    { SymAssign, @"=" },
                    { SymDefine, @"::=" },
                    { SymPlus, @"\+" },
                    { SymMinus, @"\-" },
                    { SymOr, @"\|" },
                    { SymQuestion, @"\?" },
                    { SymStar, @"\*" },
                    { SymParensOpen, @"\(" },
                    { SymParensClose, @"\)" }
                }
                .CreateStateMachine(out var dfaStartState)
                .Compile();
                this.DfaStartState = dfaStartState;
                this.LalrTable     = new LalrTableGenerator(new GrammarBuilder(SymUnknown, SymInit, SymGrammar)
                {
                    { SymGrammar, SymNlOpt, SymContent },
                    { SymContent, SymContent, SymDefinition },
                    { SymContent, SymDefinition },
                    { SymDefinition, SymParameter },
                    { SymDefinition, SymSetDecl },
                    { SymDefinition, SymTerminalDecl },
                    { SymDefinition, SymRuleDecl },
                    { SymNlOpt, SymNewline, SymNlOpt },
                    { SymNlOpt },
                    { SymNl, SymNewline, SymNl },
                    { SymNl, SymNewline },
                    { SymParameter, SymParameterName, SymNlOpt, SymAssign, SymParameterBody, SymNl },
                    { SymParameterBody, SymParameterBody, SymNlOpt, SymOr, SymParameterItems },
                    { SymParameterBody, SymParameterItems },
                    { SymParameterItems, SymParameterItems, SymParameterItem },
                    { SymParameterItems, SymParameterItem },
                    { SymParameterItem, SymParameterName },
                    { SymParameterItem, SymTerminal },
                    { SymParameterItem, SymSetLiteral },
                    { SymParameterItem, SymSetName },
                    { SymParameterItem, SymNonterminal },
                    { SymSetDecl, SymSetName, SymNlOpt, SymAssign, SymSetExp, SymNl },
                    { SymSetExp, SymSetExp, SymNlOpt, SymPlus, SymSetItem },
                    { SymSetExp, SymSetExp, SymNlOpt, SymMinus, SymSetItem },
                    { SymSetExp, SymSetItem },
                    { SymSetItem, SymSetLiteral },
                    { SymSetItem, SymSetName },
                    { SymTerminalDecl, SymTerminalName, SymNlOpt, SymAssign, SymRegExp, SymNl },
                    { SymTerminalName, SymTerminalName, SymTerminal },
                    { SymTerminalName, SymTerminal },
                    { SymRegExp, SymRegExp, SymNlOpt, SymOr, SymRegExpSeq },
                    { SymRegExp, SymRegExpSeq },
                    { SymRegExpSeq, SymRegExpSeq, SymRegExpItem },
                    { SymRegExpSeq, SymRegExpItem },
                    { SymRegExpItem, SymSetLiteral, SymKleeneOpt },
                    { SymRegExpItem, SymSetName, SymKleeneOpt },
                    { SymRegExpItem, SymTerminal, SymKleeneOpt },
                    { SymRegExpItem, SymParensOpen, SymRegExp2, SymParensClose, SymKleeneOpt },
                    { SymRegExp2, SymRegExp2, SymOr, SymRegExpSeq },
                    { SymRegExp2, SymRegExpSeq },
                    { SymKleeneOpt, SymPlus },
                    { SymKleeneOpt, SymQuestion },
                    { SymKleeneOpt, SymStar },
                    { SymKleeneOpt },
                    { SymRuleDecl, SymNonterminal, SymNlOpt, SymDefine, SymHandles, SymNl },
                    { SymHandles, SymHandles, SymNlOpt, SymOr, SymHandle },
                    { SymHandles, SymHandle },
                    { SymHandle, SymHandle, SymSymbol },
                    { SymHandle },
                    { SymSymbol, SymTerminal },
                    { SymSymbol, SymNonterminal }
                })
                                     .ComputeTable();
            }