public void SingleCodePointRangeShouldHaveExpectedResults(int codePoint) { var range = new UnicodeCodePointRange(codePoint); Assert.Equal(codePoint, range.FirstCodePoint); Assert.Equal(codePoint, range.LastCodePoint); Assert.True(range.IsSingleCodePoint); }
public void MultiCodePointRangeShouldHaveExpectedResults(int firstCodePoint, int lastCodePoint) { var range = new UnicodeCodePointRange(firstCodePoint, lastCodePoint); Assert.Equal(firstCodePoint, range.FirstCodePoint); Assert.Equal(lastCodePoint, range.LastCodePoint); Assert.False(range.IsSingleCodePoint); }
private static async Task ProcessBlocksFile(IDataSource ucdSource, UnicodeInfoBuilder builder) { using (var reader = new UnicodeDataFileReader(await ucdSource.OpenDataFileAsync(BlocksFileName).ConfigureAwait(false), ';')) { while (reader.MoveToNextLine()) { builder.AddBlockEntry(new UnicodeBlock(UnicodeCodePointRange.Parse(reader.ReadField()), reader.ReadTrimmedField())); } } }
private static async Task ProcessEmojiDataFile(IDataSource emojiSource, UnicodeInfoBuilder builder) { using (var reader = new UnicodeDataFileReader(await emojiSource.OpenDataFileAsync(EmojiDataFileName).ConfigureAwait(false), ';')) { while (reader.MoveToNextLine()) { var range = UnicodeCodePointRange.Parse(reader.ReadTrimmedField()); if (EnumHelper <EmojiProperties> .TryGetNamedValue(reader.ReadTrimmedField(), out var property)) { builder.SetProperties(property, range); } } } }
public void SetProperties(EmojiProperties property, UnicodeCodePointRange codePointRange) { int firstIndex = FindUcdCodePoint(codePointRange.FirstCodePoint); int lastIndex = FindUcdCodePoint(codePointRange.LastCodePoint); if (firstIndex < 0 && lastIndex < 0) { Insert(new UnicodeCharacterDataBuilder(codePointRange) { EmojiProperties = property }); return; } if (firstIndex < 0 || lastIndex < 0 || ucdEntries[firstIndex].CodePointRange.FirstCodePoint <codePointRange.FirstCodePoint || ucdEntries[lastIndex].CodePointRange.LastCodePoint> codePointRange.LastCodePoint) { throw new InvalidOperationException("Unable to find code point for setting emoji property."); } int i = firstIndex; while (true) { ucdEntries[i].EmojiProperties |= property; if (i == lastIndex) { break; } ++i; } }
private static async Task ProcessUnicodeDataFile(IDataSource ucdSource, UnicodeInfoBuilder builder) { using (var reader = new UnicodeDataFileReader(await ucdSource.OpenDataFileAsync(UnicodeDataFileName).ConfigureAwait(false), ';')) { int rangeStartCodePoint = -1; while (reader.MoveToNextLine()) { var codePoint = new UnicodeCodePointRange(int.Parse(reader.ReadField(), NumberStyles.HexNumber)); string name = reader.ReadField(); if (!string.IsNullOrEmpty(name) && name[0] == '<' && name[name.Length - 1] == '>') { if (name.EndsWith(", First>", StringComparison.OrdinalIgnoreCase)) { if (rangeStartCodePoint >= 0) { throw new InvalidDataException("Invalid range data in UnicodeData.txt."); } rangeStartCodePoint = codePoint.FirstCodePoint; continue; } else if (name.EndsWith(", Last>", StringComparison.OrdinalIgnoreCase)) { if (rangeStartCodePoint < 0) { throw new InvalidDataException("Invalid range data in UnicodeData.txt."); } codePoint = new UnicodeCodePointRange(rangeStartCodePoint, codePoint.LastCodePoint); name = name.Substring(1, name.Length - 8).ToUpperInvariant(); // Upper-case the name in order to respect unicode naming scheme. (Spec says all names are uppercase ASCII) rangeStartCodePoint = -1; } else if (name == "<control>") // Ignore the name of the property for these code points, as it should really be empty by the spec. { // For control characters, we can derive a character label in of the form <control-NNNN>, which is not the character name. name = null; } else { throw new InvalidDataException("Unexpected code point name tag: " + name + "."); } } else if (rangeStartCodePoint >= 0) { throw new InvalidDataException("Invalid range data in UnicodeData.txt."); } // NB: Fields 10 and 11 are deemed obsolete. Field 11 should always be empty, and will be ignored here. var characterData = new UnicodeCharacterDataBuilder(codePoint) { Name = NullIfEmpty(name), Category = UnicodeCategoryInfo.FromShortName(reader.ReadField()).Category, CanonicalCombiningClass = (CanonicalCombiningClass)byte.Parse(reader.ReadField()), }; if (EnumHelper <BidirectionalClass> .TryGetNamedValue(reader.ReadField(), out var bidirectionalClass)) { characterData.BidirectionalClass = bidirectionalClass; } else { throw new InvalidDataException(string.Format("Missing Bidi_Class property for code point(s) {0}.", codePoint)); } characterData.CharacterDecompositionMapping = CharacterDecompositionMapping.Parse(NullIfEmpty(reader.ReadField())); string numericDecimalField = NullIfEmpty(reader.ReadField()); string numericDigitField = NullIfEmpty(reader.ReadField()); string numericNumericField = NullIfEmpty(reader.ReadField()); characterData.BidirectionalMirrored = reader.ReadField() == "Y"; characterData.OldName = NullIfEmpty(reader.ReadField()); reader.SkipField(); characterData.SimpleUpperCaseMapping = ParseSimpleCaseMapping(reader.ReadField()); characterData.SimpleLowerCaseMapping = ParseSimpleCaseMapping(reader.ReadField()); characterData.SimpleTitleCaseMapping = ParseSimpleCaseMapping(reader.ReadField()); // Handle Numeric_Type & Numeric_Value: // If field 6 is set, fields 7 and 8 should have the same value, and Numeric_Type is Decimal. // If field 6 is not set but field 7 is set, field 8 should be set and have the same value. Then, the type is Digit. // If field 6 and 7 are not set, but field 8 is set, then Numeric_Type is Numeric. if (numericNumericField != null) { characterData.NumericValue = UnicodeRationalNumber.Parse(numericNumericField); if (numericDigitField != null) { if (numericDigitField != numericNumericField) { throw new InvalidDataException("Invalid value for field 7 of code point " + characterData.CodePointRange.ToString() + "."); } if (numericDecimalField != null) { if (numericDecimalField != numericDigitField) { throw new InvalidDataException("Invalid value for field 6 of code point " + characterData.CodePointRange.ToString() + "."); } characterData.NumericType = UnicodeNumericType.Decimal; } else { characterData.NumericType = UnicodeNumericType.Digit; } } else { characterData.NumericType = UnicodeNumericType.Numeric; } } builder.Insert(characterData); } } }