public void ConvertToPalasoConformPrivateUseRfc5646Tag(string flexConformPrivateUseRfc5646Tag) { string language = String.Empty; string script = String.Empty; string region = String.Empty; string variant = String.Empty; string privateUse = String.Empty; var tokens = flexConformPrivateUseRfc5646Tag.Split(new[] { '-' }); for (int position = 0; position < tokens.Length; ++position) { string currentToken = tokens[position]; if (position == 0) { if (!currentToken.Equals("x", StringComparison.OrdinalIgnoreCase)) { throw new ValidationException(String.Format("The rfctag {0} does not start with 'x-' or 'X-'.", flexConformPrivateUseRfc5646Tag)); } language = currentToken; } else if (position == 1 && !StandardSubtags.IsValidIso15924ScriptCode(currentToken)) { language = language + '-' + currentToken; } else if (StandardSubtags.IsValidIso15924ScriptCode(currentToken)) { if (!String.IsNullOrEmpty(region) || !String.IsNullOrEmpty(variant)) { throw new ValidationException( String.Format( "The rfctag '{0}' contains a misplaced Script subtag (i.e. it was preceded by a region or variant subtag.", flexConformPrivateUseRfc5646Tag)); } script = currentToken; } else if (StandardSubtags.IsValidIso3166RegionCode(currentToken)) { if (!String.IsNullOrEmpty(variant)) { throw new ValidationException( String.Format( "The rfctag '{0}' contains a misplaced Region subtag (i.e. it was preceded by a variant subtag.", flexConformPrivateUseRfc5646Tag)); } region = currentToken; } else if (StandardSubtags.IsValidRegisteredVariantCode(currentToken)) { variant = variant + currentToken; } else { privateUse = String.IsNullOrEmpty(privateUse) ? currentToken : privateUse + '-' + currentToken; } } variant = IetfLanguageTag.ConcatenateVariantAndPrivateUse(variant, privateUse); ConvertToPalasoConformPrivateUseRfc5646Tag(language, script, region, variant); }
///<summary>Constructor method to parse a valid RFC5646 tag as a string ///</summary> ///<param name="inputString">valid RFC5646 string</param> ///<returns>RFC5646Tag object</returns> public static Rfc5646Tag Parse(string inputString) { var tokens = inputString.Split(new[] { '-' }); var rfc5646Tag = new Rfc5646Tag(); bool haveX = false; for (int position = 0; position < tokens.Length; ++position) { var token = tokens[position]; if (token == "x") { haveX = true; continue; } if (haveX) { //This is the case for RfcTags consisting only of a private use subtag if (position == 1) { rfc5646Tag = new Rfc5646Tag(String.Empty, String.Empty, String.Empty, String.Empty, token); continue; } rfc5646Tag.AddToPrivateUse(token); continue; } if (position == 0) { rfc5646Tag.Language = token; continue; } if (position <= 1 && StandardSubtags.IsValidIso15924ScriptCode(token)) { rfc5646Tag.Script = token; continue; } if (position <= 2 && StandardSubtags.IsValidIso3166RegionCode(token)) { rfc5646Tag.Region = token; continue; } if (StandardSubtags.IsValidRegisteredVariantCode(token)) { rfc5646Tag.AddToVariant(token); continue; } throw new ValidationException(String.Format("The RFC tag '{0}' could not be parsed.", inputString)); } return(rfc5646Tag); }
private void ValidateRegion() { if (String.IsNullOrEmpty(_region)) { return; } if (_region.Contains("-")) { throw new ValidationException("The region tag may not contain dashes or underscores. I.e. there may only be a single iso 639 tag in this subtag"); } if (!StandardSubtags.IsValidIso3166RegionCode(_region)) { throw new ValidationException(String.Format("'{0}' is not a valid ISO-3166 region code.", _region)); } }
/// <summary> /// Cleans the tag. /// </summary> public void Clean() { // Migrate legacy ISO3 language codes to IANA 2 letter language codes, if there's a match. // Do this before we look for valid codes, otherwise the 3-letter ones come up as invalid and // get moved to private use. However, only do this to languages not identified as private-use. if (!Language.StartsWith("x-", StringComparison.OrdinalIgnoreCase)) { string migrateFrom = ""; string migrateTo = ""; foreach (string part in _languageSubTag.AllParts) { if (part.Equals("x", StringComparison.OrdinalIgnoreCase)) { break; // don't migrate language code parts already explicitly marked private-use. } if (string.IsNullOrEmpty(migrateFrom)) { LanguageSubtag language; if (StandardSubtags.TryGetLanguageFromIso3Code(part, out language) && language.Code != language.Iso3Code) { migrateFrom = part; migrateTo = language.Code; } } } if (!String.IsNullOrEmpty(migrateFrom)) { _languageSubTag.RemoveParts(migrateFrom); _languageSubTag.AddToSubtag(migrateTo); } } // The very next thing, before anything else gets moved to private use, is to move the parts whose position we // care about to the appropriate position in the private use section. // In the process we may remove anything non-alphanumeric, since otherwise we may move a marker that later // disappears (pathologically). MoveFirstPartToPrivateUseIfNecessary(_languageSubTag, StandardSubtags.IsValidIso639LanguageCode, "qaa", true); MoveFirstPartToPrivateUseIfNecessary(_scriptSubTag, StandardSubtags.IsValidIso15924ScriptCode, "Qaaa", false); MoveFirstPartToPrivateUseIfNecessary(_regionSubTag, StandardSubtags.IsValidIso3166RegionCode, "QM", false); //This fixes a bug where the LdmlAdaptorV1 was writing out Zxxx as part of the variant to mark an audio writing system if (_variantSubTag.Contains(WellKnownSubtags.AudioScript)) { MoveTagsMatching(_variantSubTag, _scriptSubTag, tag => tag.Equals(WellKnownSubtags.AudioScript)); _privateUseSubTag.AddToSubtag(WellKnownSubtags.AudioPrivateUse); } // Fixes various legacy problems. if (Language.Equals("cmn", StringComparison.OrdinalIgnoreCase)) { Language = "zh"; } if (Language.Equals("pes", StringComparison.OrdinalIgnoreCase)) { Language = "fa"; } if (Language.Equals("arb", StringComparison.OrdinalIgnoreCase)) { Language = "ar"; } if (Language.Equals("zh", StringComparison.OrdinalIgnoreCase) && String.IsNullOrEmpty(Region)) { Region = "CN"; } // If the language tag contains an x- , then move the string behind the x- to private use MovePartsToPrivateUseIfNecessary(_languageSubTag); // Move script, region, and variant present in the langauge tag to their proper subtag. MoveTagsMatching(_languageSubTag, _scriptSubTag, StandardSubtags.IsValidIso15924ScriptCode, StandardSubtags.IsValidIso639LanguageCode); MoveTagsMatching(_languageSubTag, _regionSubTag, StandardSubtags.IsValidIso3166RegionCode, StandardSubtags.IsValidIso639LanguageCode); MoveTagsMatching(_languageSubTag, _variantSubTag, StandardSubtags.IsValidRegisteredVariantCode, StandardSubtags.IsValidIso639LanguageCode); // Move all other tags that don't belong to the private use subtag. //keep track of everything that we moved var tempSubTag = new SubTag(); MoveTagsMatching(_languageSubTag, tempSubTag, tag => !StandardSubtags.IsValidIso639LanguageCode(tag)); //place all the moved parts in private use. foreach (var part in tempSubTag.AllParts) { _privateUseSubTag.AddToSubtag(part); //if it looks like we moved a custom script set the subtag to mark that we've moved it if (_scriptSubTag.IsEmpty && part.Length == 4 && //potential custom script tag !WellKnownSubtags.IpaPhonemicPrivateUse.EndsWith(part) && !WellKnownSubtags.IpaPhoneticPrivateUse.EndsWith(part)) { _scriptSubTag = new SubTag("Qaaa"); } } MoveTagsMatching(_scriptSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidIso15924ScriptCode(tag)); MoveTagsMatching(_regionSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidIso3166RegionCode(tag)); MoveTagsMatching(_variantSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidRegisteredVariantCode(tag)); _languageSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag); _scriptSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag); _regionSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag); if (_privateUseSubTag.Contains(WellKnownSubtags.AudioPrivateUse)) { // Move every tag that's not a Zxxx to private use if (!_scriptSubTag.IsEmpty && !_scriptSubTag.Contains(WellKnownSubtags.AudioScript)) { MoveTagsMatching(_scriptSubTag, _privateUseSubTag, tag => !_privateUseSubTag.Contains(tag)); } // If we don't have a Zxxx already, set it. This protects tags already present, but with unusual case if (!_scriptSubTag.Contains(WellKnownSubtags.AudioScript)) { _scriptSubTag = new SubTag(WellKnownSubtags.AudioScript); } } //These two methods may produce duplicates that will subsequently be removed. Do we care? - TA 29/3/2011 _privateUseSubTag.RemoveNonAlphaNumericCharacters(); _privateUseSubTag.TruncatePartsToNumCharacters(8); _variantSubTag.RemoveDuplicates(); _privateUseSubTag.RemoveDuplicates(); // Any 'x' in the other tags will have arrived in the privateUse tag, so remove them. _privateUseSubTag.RemoveParts("x"); // if language is empty, we need to add qaa, unless only a privateUse is present (e.g. x-blah is a valid rfc5646 tag) if ((_languageSubTag.IsEmpty && (!_scriptSubTag.IsEmpty || !_regionSubTag.IsEmpty || !_variantSubTag.IsEmpty)) || (_languageSubTag.IsEmpty && _scriptSubTag.IsEmpty && _regionSubTag.IsEmpty && _variantSubTag.IsEmpty && _privateUseSubTag.IsEmpty)) { _languageSubTag.AddToSubtag("qaa"); } // Two more legacy problems. We don't allow -etic or -emic without fonipa, so insert if needed. // If it has some other standard variant we won't be able to fix it...not sure what the right answer would be. // At least we catch the more common case. foreach (string part in _privateUseSubTag.AllParts) { if (string.IsNullOrEmpty(Variant) && (part.Equals("etic", StringComparison.OrdinalIgnoreCase) || part.Equals("emic", StringComparison.OrdinalIgnoreCase))) { Variant = "fonipa"; } } }
public void IsValidIso3166Region_fonipa_ReturnsFalse() { Assert.That(StandardSubtags.IsValidIso3166RegionCode("fonipa"), Is.False); }
public void IsValidIso3166Region_QM_ReturnsTrue() { Assert.That(StandardSubtags.IsValidIso3166RegionCode("QM"), Is.True); Assert.That(StandardSubtags.IsValidIso3166RegionCode("qm"), Is.True); }
public void IsValidIso3166Region_US_ReturnsTrue() { Assert.That(StandardSubtags.IsValidIso3166RegionCode("US"), Is.True); }