public TestCompEq(UcdLoader loader) { _loader = loader; List <string> list = new List <string>(); List <int> listPoint = new List <int>(); foreach (var point in loader.GetCodePoints()) { if (point.CodeValue == 0 || (point.CodeValue >= 0xD800 && point.CodeValue <= 0xDFFF)) { continue; } list.Add(point.ToString()); listPoint.Add(point.CodeValue); } _allStrings = list.ToArray(); _allPoints = listPoint.ToArray(); }
public ScannerMapBuilder(UcdLoader loader) { _loader = loader; foreach (UcdBlock block in loader.Blocks) { switch (block.Block) { case Block.HighSurrogates: case Block.HighPrivateUseSurrogates: case Block.LowSurrogates: case Block.PrivateUseArea: case Block.SupplementaryPrivateUseAreaA: case Block.SupplementaryPrivateUseAreaB: continue; } _list.Clear(); foreach (UnicodeEntry entry in loader.GetCodePoints(block)) { CodeEntry code; code.CodePoint = entry.CodeValue; switch (entry.Category) { case UnicodeCharacterType.LetterUppercase: case UnicodeCharacterType.LetterLowercase: case UnicodeCharacterType.LetterTitlecase: case UnicodeCharacterType.LetterModifier: case UnicodeCharacterType.LetterOther: break; default: continue; } if (entry.DecomposingLength > 0) { int val = GetDecomposed(entry.CodeValue, true); } } } }
public HashCodeTest(UcdLoader loader) { _loader = loader; List <string> list = new List <string>(); List <int> listPoint = new List <int>(); List <int[]> listPoints = new List <int[]>(); StringBuilder b = new StringBuilder(); int len = 0; foreach (var point in loader.GetCodePoints()) { if (point.CodeValue == 0) { continue; } point.AppendCharTo(b); listPoint.Add(point.CodeValue); len++; if (len == 10) { len = 0; list.Add(b.ToString()); listPoints.Add(listPoint.ToArray()); b.Clear(); listPoint.Clear(); } } if (b.Length > 0) { list.Add(b.ToString()); listPoints.Add(listPoint.ToArray()); } _allStrings = list.ToArray(); _allPoints = listPoints.ToArray(); }
public void WriteBreakMap() { EnumRange <WordBreak>[] breaks = _loader.LoadWordBreak(); var map = new Dictionary <int, byte>(); foreach (var range in breaks) { //switch ( range.Value ) { //case WordBreak.MidLetter: // continue; //} for (int i = range.Begin; i <= range.End; i++) { if (!map.ContainsKey(i)) { map[i] = (byte)range.Value; } else { Debug.WriteLine("DF"); } } } map.Add(160, (byte)MoreWord.Connector); // Non Breaking space map.Add('\t', (byte)WordBreak.WSegSpace); // Tab foreach (var entry in _loader.GetCodePoints(0, ushort.MaxValue)) { if (!map.ContainsKey(entry.CodeValue)) { switch (entry.Category) { case UnicodeCharacterType.OtherPrivateUse: map.Add(entry.CodeValue, (byte)MoreWord.Private); break; case UnicodeCharacterType.OtherSurrogate: map.Add(entry.CodeValue, (byte)MoreWord.Surrogate); break; case UnicodeCharacterType.OtherControl: map.Add(entry.CodeValue, (byte)MoreWord.Control); continue; case UnicodeCharacterType.LetterUppercase: case UnicodeCharacterType.LetterLowercase: case UnicodeCharacterType.LetterTitlecase: map.Add(entry.CodeValue, 52); break; case UnicodeCharacterType.LetterOther: map.Add(entry.CodeValue, (byte)MoreWord.Ideograph); break; case UnicodeCharacterType.NumberOther: case UnicodeCharacterType.LetterModifier: map.Add(entry.CodeValue, (byte)MoreWord.Symbol); break; case UnicodeCharacterType.NumberDecimalDigit: case UnicodeCharacterType.MarkEnclosing: map.Add(entry.CodeValue, (byte)MoreWord.Symbol); break; case UnicodeCharacterType.NumberLetter: case UnicodeCharacterType.SymbolMath: case UnicodeCharacterType.SymbolCurrency: case UnicodeCharacterType.SymbolModifier: case UnicodeCharacterType.SymbolOther: map.Add(entry.CodeValue, (byte)MoreWord.Symbol); break; case UnicodeCharacterType.PunctuationConnector: map.Add(entry.CodeValue, (byte)MoreWord.Punctuation); break; case UnicodeCharacterType.PunctuationDash: case UnicodeCharacterType.PunctuationOpen: case UnicodeCharacterType.PunctuationClose: case UnicodeCharacterType.PunctuationInitialQuote: case UnicodeCharacterType.PunctuationFinalQuote: case UnicodeCharacterType.PunctuationOther: map.Add(entry.CodeValue, (byte)MoreWord.Punctuation); break; case UnicodeCharacterType.OtherFormat: case UnicodeCharacterType.SeparatorSpace: map.Add(entry.CodeValue, (byte)MoreWord.WhiteSpace); break; case UnicodeCharacterType.MarkSpacingCombining: case UnicodeCharacterType.SeparatorLine: case UnicodeCharacterType.SeparatorParagraph: map.Add(entry.CodeValue, (byte)MoreWord.LineSeparator); break; default: map.Add(entry.CodeValue, 60); break; } } } #if true1 using (var w = File.CreateText(@"f:\_tests\Del\WordBreak.txt")) { for (int i = 0; i <= ushort.MaxValue; i++) { string text = $"{i:X4} "; if (_loader.TryGetEntry(i, out UnicodeEntry entry)) { if (IsPrintable(entry.Category)) { text += entry + " "; } text += entry.Name + " "; } map.TryGetValue(i, out var value); w.WriteLine(text + value); } } #else InterleaveMap imap = new InterleaveMap(); for (int i = 0; i <= ushort.MaxValue; i++) { if (map.TryGetValue(i, out byte value)) { imap.Add((char)i, value); } } using (var w = File.Create(@"f:\_tests\Del\WordBreak2.bin")) using (var b = new BinaryWriter(w)){ imap.SaveByte(b); } #endif }
public TableBuilder(UcdLoader loader) { _loader = loader; var list = new List <CodeEntry>(); //foreach ( var entry in loader.GetCodePoints() ) { // all[ entry.CodeValue ] = entry.ToString(); //} var blocks = new List <BlockEntry>(); var decomposing = new List <int>(); var builder = new StringBuilder(); string GetDecomposed(int codePoint, bool cascade) { decomposing.Clear(); loader.AddDecomposing(codePoint, decomposing, cascade); if (decomposing.Count <= 1) { // accept only multi-code point decombosions return(null); } builder.Clear(); foreach (int code in decomposing) { var entry = loader[code]; if (entry.CodeValue == 0) { return(null); } entry.AppendCharTo(builder); } return(builder.ToString()); } foreach (UcdBlock block in loader.Blocks) { switch (block.Block) { case Block.HighSurrogates: case Block.HighPrivateUseSurrogates: case Block.LowSurrogates: case Block.PrivateUseArea: case Block.SupplementaryPrivateUseAreaA: case Block.SupplementaryPrivateUseAreaB: continue; } list.Clear(); foreach (UnicodeEntry entry in loader.GetCodePoints(block)) { list.Add(new CodeEntry(entry.CodeValue, entry.ToString())); if (entry.DecomposingLength > 0) { string decomp = GetDecomposed(entry.CodeValue, true); if (decomp != null) { list.Add(new CodeEntry(entry.CodeValue, decomp, 2)); string decomp2 = GetDecomposed(entry.CodeValue, false); if (decomp2 != null && decomp2 != decomp) { list.Add(new CodeEntry(entry.CodeValue, decomp2, 1)); } } } } blocks.Add(new BlockEntry() { Block = block, Entries = list.ToArray() }); } _allBlocks = blocks.ToArray(); }