public void ConvertToPalasoConformPrivateUseRfc5646Tag(string flexConformPrivateUseRfc5646Tag)
        {
            string language   = String.Empty;
            string script     = String.Empty;
            string region     = String.Empty;
            string variant    = String.Empty;
            string privateUse = String.Empty;

            var tokens = flexConformPrivateUseRfc5646Tag.Split(new[] { '-' });

            for (int position = 0; position < tokens.Length; ++position)
            {
                string currentToken = tokens[position];
                if (position == 0)
                {
                    if (!currentToken.Equals("x", StringComparison.OrdinalIgnoreCase))
                    {
                        throw new ValidationException(String.Format("The rfctag {0} does not start with 'x-' or 'X-'.",
                                                                    flexConformPrivateUseRfc5646Tag));
                    }
                    language = currentToken;
                }
                else if (position == 1 && !StandardSubtags.IsValidIso15924ScriptCode(currentToken))
                {
                    language = language + '-' + currentToken;
                }
                else if (StandardSubtags.IsValidIso15924ScriptCode(currentToken))
                {
                    if (!String.IsNullOrEmpty(region) || !String.IsNullOrEmpty(variant))
                    {
                        throw new ValidationException(
                                  String.Format(
                                      "The rfctag '{0}' contains a misplaced Script subtag (i.e. it was preceded by a region or variant subtag.",
                                      flexConformPrivateUseRfc5646Tag));
                    }
                    script = currentToken;
                }
                else if (StandardSubtags.IsValidIso3166RegionCode(currentToken))
                {
                    if (!String.IsNullOrEmpty(variant))
                    {
                        throw new ValidationException(
                                  String.Format(
                                      "The rfctag '{0}' contains a misplaced Region subtag (i.e. it was preceded by a variant subtag.",
                                      flexConformPrivateUseRfc5646Tag));
                    }
                    region = currentToken;
                }
                else if (StandardSubtags.IsValidRegisteredVariantCode(currentToken))
                {
                    variant = variant + currentToken;
                }
                else
                {
                    privateUse = String.IsNullOrEmpty(privateUse) ? currentToken : privateUse + '-' + currentToken;
                }
            }
            variant = IetfLanguageTag.ConcatenateVariantAndPrivateUse(variant, privateUse);
            ConvertToPalasoConformPrivateUseRfc5646Tag(language, script, region, variant);
        }
Esempio n. 2
0
        ///<summary>Constructor method to parse a valid RFC5646 tag as a string
        ///</summary>
        ///<param name="inputString">valid RFC5646 string</param>
        ///<returns>RFC5646Tag object</returns>
        public static Rfc5646Tag Parse(string inputString)
        {
            var tokens = inputString.Split(new[] { '-' });

            var rfc5646Tag = new Rfc5646Tag();

            bool haveX = false;

            for (int position = 0; position < tokens.Length; ++position)
            {
                var token = tokens[position];
                if (token == "x")
                {
                    haveX = true;
                    continue;
                }
                if (haveX)
                {
                    //This is the case for RfcTags consisting only of a private use subtag
                    if (position == 1)
                    {
                        rfc5646Tag = new Rfc5646Tag(String.Empty, String.Empty, String.Empty, String.Empty, token);
                        continue;
                    }
                    rfc5646Tag.AddToPrivateUse(token);
                    continue;
                }
                if (position == 0)
                {
                    rfc5646Tag.Language = token;
                    continue;
                }
                if (position <= 1 && StandardSubtags.IsValidIso15924ScriptCode(token))
                {
                    rfc5646Tag.Script = token;
                    continue;
                }
                if (position <= 2 && StandardSubtags.IsValidIso3166RegionCode(token))
                {
                    rfc5646Tag.Region = token;
                    continue;
                }
                if (StandardSubtags.IsValidRegisteredVariantCode(token))
                {
                    rfc5646Tag.AddToVariant(token);
                    continue;
                }
                throw new ValidationException(String.Format("The RFC tag '{0}' could not be parsed.", inputString));
            }
            return(rfc5646Tag);
        }
Esempio n. 3
0
 private void ValidateRegion()
 {
     if (String.IsNullOrEmpty(_region))
     {
         return;
     }
     if (_region.Contains("-"))
     {
         throw new ValidationException("The region tag may not contain dashes or underscores. I.e. there may only be a single iso 639 tag in this subtag");
     }
     if (!StandardSubtags.IsValidIso3166RegionCode(_region))
     {
         throw new ValidationException(String.Format("'{0}' is not a valid ISO-3166 region code.", _region));
     }
 }
        /// <summary>
        /// Cleans the tag.
        /// </summary>
        public void Clean()
        {
            // Migrate legacy ISO3 language codes to IANA 2 letter language codes, if there's a match.
            // Do this before we look for valid codes, otherwise the 3-letter ones come up as invalid and
            // get moved to private use. However, only do this to languages not identified as private-use.
            if (!Language.StartsWith("x-", StringComparison.OrdinalIgnoreCase))
            {
                string migrateFrom = "";
                string migrateTo   = "";
                foreach (string part in _languageSubTag.AllParts)
                {
                    if (part.Equals("x", StringComparison.OrdinalIgnoreCase))
                    {
                        break;                         // don't migrate language code parts already explicitly marked private-use.
                    }
                    if (string.IsNullOrEmpty(migrateFrom))
                    {
                        LanguageSubtag language;
                        if (StandardSubtags.TryGetLanguageFromIso3Code(part, out language) && language.Code != language.Iso3Code)
                        {
                            migrateFrom = part;
                            migrateTo   = language.Code;
                        }
                    }
                }
                if (!String.IsNullOrEmpty(migrateFrom))
                {
                    _languageSubTag.RemoveParts(migrateFrom);
                    _languageSubTag.AddToSubtag(migrateTo);
                }
            }
            // The very next thing, before anything else gets moved to private use, is to move the parts whose position we
            // care about to the appropriate position in the private use section.
            // In the process we may remove anything non-alphanumeric, since otherwise we may move a marker that later
            // disappears (pathologically).
            MoveFirstPartToPrivateUseIfNecessary(_languageSubTag, StandardSubtags.IsValidIso639LanguageCode, "qaa", true);
            MoveFirstPartToPrivateUseIfNecessary(_scriptSubTag, StandardSubtags.IsValidIso15924ScriptCode, "Qaaa", false);
            MoveFirstPartToPrivateUseIfNecessary(_regionSubTag, StandardSubtags.IsValidIso3166RegionCode, "QM", false);
            //This fixes a bug where the LdmlAdaptorV1 was writing out Zxxx as part of the variant to mark an audio writing system
            if (_variantSubTag.Contains(WellKnownSubtags.AudioScript))
            {
                MoveTagsMatching(_variantSubTag, _scriptSubTag, tag => tag.Equals(WellKnownSubtags.AudioScript));
                _privateUseSubTag.AddToSubtag(WellKnownSubtags.AudioPrivateUse);
            }
            // Fixes various legacy problems.
            if (Language.Equals("cmn", StringComparison.OrdinalIgnoreCase))
            {
                Language = "zh";
            }
            if (Language.Equals("pes", StringComparison.OrdinalIgnoreCase))
            {
                Language = "fa";
            }
            if (Language.Equals("arb", StringComparison.OrdinalIgnoreCase))
            {
                Language = "ar";
            }
            if (Language.Equals("zh", StringComparison.OrdinalIgnoreCase) && String.IsNullOrEmpty(Region))
            {
                Region = "CN";
            }

            // If the language tag contains an x- , then move the string behind the x- to private use
            MovePartsToPrivateUseIfNecessary(_languageSubTag);

            // Move script, region, and variant present in the langauge tag to their proper subtag.
            MoveTagsMatching(_languageSubTag, _scriptSubTag, StandardSubtags.IsValidIso15924ScriptCode, StandardSubtags.IsValidIso639LanguageCode);
            MoveTagsMatching(_languageSubTag, _regionSubTag, StandardSubtags.IsValidIso3166RegionCode, StandardSubtags.IsValidIso639LanguageCode);
            MoveTagsMatching(_languageSubTag, _variantSubTag, StandardSubtags.IsValidRegisteredVariantCode, StandardSubtags.IsValidIso639LanguageCode);

            // Move all other tags that don't belong to the private use subtag.

            //keep track of everything that we moved
            var tempSubTag = new SubTag();

            MoveTagsMatching(_languageSubTag, tempSubTag, tag => !StandardSubtags.IsValidIso639LanguageCode(tag));
            //place all the moved parts in private use.
            foreach (var part in tempSubTag.AllParts)
            {
                _privateUseSubTag.AddToSubtag(part);
                //if it looks like we moved a custom script set the subtag to mark that we've moved it
                if (_scriptSubTag.IsEmpty &&
                    part.Length == 4 &&                     //potential custom script tag
                    !WellKnownSubtags.IpaPhonemicPrivateUse.EndsWith(part) &&
                    !WellKnownSubtags.IpaPhoneticPrivateUse.EndsWith(part))
                {
                    _scriptSubTag = new SubTag("Qaaa");
                }
            }

            MoveTagsMatching(_scriptSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidIso15924ScriptCode(tag));
            MoveTagsMatching(_regionSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidIso3166RegionCode(tag));
            MoveTagsMatching(_variantSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidRegisteredVariantCode(tag));

            _languageSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag);
            _scriptSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag);
            _regionSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag);

            if (_privateUseSubTag.Contains(WellKnownSubtags.AudioPrivateUse))
            {
                // Move every tag that's not a Zxxx to private use
                if (!_scriptSubTag.IsEmpty && !_scriptSubTag.Contains(WellKnownSubtags.AudioScript))
                {
                    MoveTagsMatching(_scriptSubTag, _privateUseSubTag, tag => !_privateUseSubTag.Contains(tag));
                }
                // If we don't have a Zxxx already, set it. This protects tags already present, but with unusual case
                if (!_scriptSubTag.Contains(WellKnownSubtags.AudioScript))
                {
                    _scriptSubTag = new SubTag(WellKnownSubtags.AudioScript);
                }
            }

            //These two methods may produce duplicates that will subsequently be removed. Do we care? - TA 29/3/2011
            _privateUseSubTag.RemoveNonAlphaNumericCharacters();
            _privateUseSubTag.TruncatePartsToNumCharacters(8);

            _variantSubTag.RemoveDuplicates();
            _privateUseSubTag.RemoveDuplicates();
            // Any 'x' in the other tags will have arrived in the privateUse tag, so remove them.
            _privateUseSubTag.RemoveParts("x");

            // if language is empty, we need to add qaa, unless only a privateUse is present (e.g. x-blah is a valid rfc5646 tag)
            if ((_languageSubTag.IsEmpty && (!_scriptSubTag.IsEmpty || !_regionSubTag.IsEmpty || !_variantSubTag.IsEmpty)) ||
                (_languageSubTag.IsEmpty && _scriptSubTag.IsEmpty && _regionSubTag.IsEmpty && _variantSubTag.IsEmpty && _privateUseSubTag.IsEmpty))
            {
                _languageSubTag.AddToSubtag("qaa");
            }

            // Two more legacy problems. We don't allow -etic or -emic without fonipa, so insert if needed.
            // If it has some other standard variant we won't be able to fix it...not sure what the right answer would be.
            // At least we catch the more common case.
            foreach (string part in _privateUseSubTag.AllParts)
            {
                if (string.IsNullOrEmpty(Variant) &&
                    (part.Equals("etic", StringComparison.OrdinalIgnoreCase) || part.Equals("emic", StringComparison.OrdinalIgnoreCase)))
                {
                    Variant = "fonipa";
                }
            }
        }
Esempio n. 5
0
 public void IsValidIso3166Region_fonipa_ReturnsFalse()
 {
     Assert.That(StandardSubtags.IsValidIso3166RegionCode("fonipa"), Is.False);
 }
Esempio n. 6
0
 public void IsValidIso3166Region_QM_ReturnsTrue()
 {
     Assert.That(StandardSubtags.IsValidIso3166RegionCode("QM"), Is.True);
     Assert.That(StandardSubtags.IsValidIso3166RegionCode("qm"), Is.True);
 }
Esempio n. 7
0
 public void IsValidIso3166Region_US_ReturnsTrue()
 {
     Assert.That(StandardSubtags.IsValidIso3166RegionCode("US"), Is.True);
 }