예제 #1
0
        public TestCompEq(UcdLoader loader)
        {
            _loader = loader;
            List <string> list      = new List <string>();
            List <int>    listPoint = new List <int>();

            foreach (var point in loader.GetCodePoints())
            {
                if (point.CodeValue == 0 ||
                    (point.CodeValue >= 0xD800 && point.CodeValue <= 0xDFFF))
                {
                    continue;
                }
                list.Add(point.ToString());
                listPoint.Add(point.CodeValue);
            }
            _allStrings = list.ToArray();
            _allPoints  = listPoint.ToArray();
        }
예제 #2
0
        public ScannerMapBuilder(UcdLoader loader)
        {
            _loader = loader;

            foreach (UcdBlock block in loader.Blocks)
            {
                switch (block.Block)
                {
                case Block.HighSurrogates:
                case Block.HighPrivateUseSurrogates:
                case Block.LowSurrogates:
                case Block.PrivateUseArea:
                case Block.SupplementaryPrivateUseAreaA:
                case Block.SupplementaryPrivateUseAreaB:
                    continue;
                }
                _list.Clear();
                foreach (UnicodeEntry entry in loader.GetCodePoints(block))
                {
                    CodeEntry code;
                    code.CodePoint = entry.CodeValue;
                    switch (entry.Category)
                    {
                    case UnicodeCharacterType.LetterUppercase:
                    case UnicodeCharacterType.LetterLowercase:
                    case UnicodeCharacterType.LetterTitlecase:
                    case UnicodeCharacterType.LetterModifier:
                    case UnicodeCharacterType.LetterOther:
                        break;

                    default:
                        continue;
                    }

                    if (entry.DecomposingLength > 0)
                    {
                        int val = GetDecomposed(entry.CodeValue, true);
                    }
                }
            }
        }
예제 #3
0
        public HashCodeTest(UcdLoader loader)
        {
            _loader = loader;
            List <string> list       = new List <string>();
            List <int>    listPoint  = new List <int>();
            List <int[]>  listPoints = new List <int[]>();
            StringBuilder b          = new StringBuilder();
            int           len        = 0;

            foreach (var point in loader.GetCodePoints())
            {
                if (point.CodeValue == 0)
                {
                    continue;
                }
                point.AppendCharTo(b);
                listPoint.Add(point.CodeValue);
                len++;
                if (len == 10)
                {
                    len = 0;
                    list.Add(b.ToString());
                    listPoints.Add(listPoint.ToArray());
                    b.Clear();
                    listPoint.Clear();
                }
            }

            if (b.Length > 0)
            {
                list.Add(b.ToString());
                listPoints.Add(listPoint.ToArray());
            }

            _allStrings = list.ToArray();
            _allPoints  = listPoints.ToArray();
        }
예제 #4
0
        public void WriteBreakMap()
        {
            EnumRange <WordBreak>[] breaks = _loader.LoadWordBreak();
            var map = new Dictionary <int, byte>();

            foreach (var range in breaks)
            {
                //switch ( range.Value ) {
                //case WordBreak.MidLetter:
                //    continue;
                //}
                for (int i = range.Begin; i <= range.End; i++)
                {
                    if (!map.ContainsKey(i))
                    {
                        map[i] = (byte)range.Value;
                    }
                    else
                    {
                        Debug.WriteLine("DF");
                    }
                }
            }

            map.Add(160, (byte)MoreWord.Connector);   // Non Breaking space
            map.Add('\t', (byte)WordBreak.WSegSpace); // Tab

            foreach (var entry in _loader.GetCodePoints(0, ushort.MaxValue))
            {
                if (!map.ContainsKey(entry.CodeValue))
                {
                    switch (entry.Category)
                    {
                    case UnicodeCharacterType.OtherPrivateUse:
                        map.Add(entry.CodeValue, (byte)MoreWord.Private);
                        break;

                    case UnicodeCharacterType.OtherSurrogate:
                        map.Add(entry.CodeValue, (byte)MoreWord.Surrogate);
                        break;

                    case UnicodeCharacterType.OtherControl:
                        map.Add(entry.CodeValue, (byte)MoreWord.Control);
                        continue;

                    case UnicodeCharacterType.LetterUppercase:
                    case UnicodeCharacterType.LetterLowercase:
                    case UnicodeCharacterType.LetterTitlecase:
                        map.Add(entry.CodeValue, 52);
                        break;

                    case UnicodeCharacterType.LetterOther:
                        map.Add(entry.CodeValue, (byte)MoreWord.Ideograph);
                        break;

                    case UnicodeCharacterType.NumberOther:
                    case UnicodeCharacterType.LetterModifier:
                        map.Add(entry.CodeValue, (byte)MoreWord.Symbol);
                        break;

                    case UnicodeCharacterType.NumberDecimalDigit:
                    case UnicodeCharacterType.MarkEnclosing:
                        map.Add(entry.CodeValue, (byte)MoreWord.Symbol);
                        break;

                    case UnicodeCharacterType.NumberLetter:
                    case UnicodeCharacterType.SymbolMath:
                    case UnicodeCharacterType.SymbolCurrency:
                    case UnicodeCharacterType.SymbolModifier:
                    case UnicodeCharacterType.SymbolOther:
                        map.Add(entry.CodeValue, (byte)MoreWord.Symbol);
                        break;

                    case UnicodeCharacterType.PunctuationConnector:
                        map.Add(entry.CodeValue, (byte)MoreWord.Punctuation);
                        break;

                    case UnicodeCharacterType.PunctuationDash:
                    case UnicodeCharacterType.PunctuationOpen:
                    case UnicodeCharacterType.PunctuationClose:
                    case UnicodeCharacterType.PunctuationInitialQuote:
                    case UnicodeCharacterType.PunctuationFinalQuote:
                    case UnicodeCharacterType.PunctuationOther:
                        map.Add(entry.CodeValue, (byte)MoreWord.Punctuation);
                        break;

                    case UnicodeCharacterType.OtherFormat:
                    case UnicodeCharacterType.SeparatorSpace:
                        map.Add(entry.CodeValue, (byte)MoreWord.WhiteSpace);
                        break;

                    case UnicodeCharacterType.MarkSpacingCombining:
                    case UnicodeCharacterType.SeparatorLine:
                    case UnicodeCharacterType.SeparatorParagraph:
                        map.Add(entry.CodeValue, (byte)MoreWord.LineSeparator);
                        break;

                    default:
                        map.Add(entry.CodeValue, 60);
                        break;
                    }
                }
            }

            #if true1
            using (var w = File.CreateText(@"f:\_tests\Del\WordBreak.txt")) {
                for (int i = 0; i <= ushort.MaxValue; i++)
                {
                    string text = $"{i:X4} ";
                    if (_loader.TryGetEntry(i, out UnicodeEntry entry))
                    {
                        if (IsPrintable(entry.Category))
                        {
                            text += entry + " ";
                        }

                        text += entry.Name + " ";
                    }
                    map.TryGetValue(i, out var value);
                    w.WriteLine(text + value);
                }
            }
#else
            InterleaveMap imap = new InterleaveMap();
            for (int i = 0; i <= ushort.MaxValue; i++)
            {
                if (map.TryGetValue(i, out byte value))
                {
                    imap.Add((char)i, value);
                }
            }


            using (var w = File.Create(@"f:\_tests\Del\WordBreak2.bin"))
                using (var b = new BinaryWriter(w)){
                    imap.SaveByte(b);
                }
            #endif
        }
예제 #5
0
        public TableBuilder(UcdLoader loader)
        {
            _loader = loader;
            var list = new List <CodeEntry>();
            //foreach ( var entry in loader.GetCodePoints() ) {
            //    all[ entry.CodeValue ] = entry.ToString();
            //}
            var blocks      = new List <BlockEntry>();
            var decomposing = new List <int>();
            var builder     = new StringBuilder();

            string GetDecomposed(int codePoint, bool cascade)
            {
                decomposing.Clear();
                loader.AddDecomposing(codePoint, decomposing, cascade);
                if (decomposing.Count <= 1)
                {
                    // accept only multi-code point decombosions
                    return(null);
                }
                builder.Clear();
                foreach (int code in decomposing)
                {
                    var entry = loader[code];
                    if (entry.CodeValue == 0)
                    {
                        return(null);
                    }
                    entry.AppendCharTo(builder);
                }
                return(builder.ToString());
            }

            foreach (UcdBlock block in loader.Blocks)
            {
                switch (block.Block)
                {
                case Block.HighSurrogates:
                case Block.HighPrivateUseSurrogates:
                case Block.LowSurrogates:
                case Block.PrivateUseArea:
                case Block.SupplementaryPrivateUseAreaA:
                case Block.SupplementaryPrivateUseAreaB:
                    continue;
                }
                list.Clear();
                foreach (UnicodeEntry entry in loader.GetCodePoints(block))
                {
                    list.Add(new CodeEntry(entry.CodeValue, entry.ToString()));
                    if (entry.DecomposingLength > 0)
                    {
                        string decomp = GetDecomposed(entry.CodeValue, true);
                        if (decomp != null)
                        {
                            list.Add(new CodeEntry(entry.CodeValue, decomp, 2));
                            string decomp2 = GetDecomposed(entry.CodeValue, false);
                            if (decomp2 != null && decomp2 != decomp)
                            {
                                list.Add(new CodeEntry(entry.CodeValue, decomp2, 1));
                            }
                        }
                    }
                }

                blocks.Add(new BlockEntry()
                {
                    Block   = block,
                    Entries = list.ToArray()
                });
            }

            _allBlocks = blocks.ToArray();
        }