Пример #1
0
        private static void MoveTagsMatching(SubTag from, SubTag to, Predicate <string> moveAllMatching)
        {
            var list = new List <string>(from.AllParts.Where(part => moveAllMatching(part)));

            foreach (var part in list)
            {
                to.AddToSubtag(part);
                from.RemoveParts(part);
            }
        }
Пример #2
0
 /// <summary>
 /// If there is a standard part (that passes test) in the parts of the subtag, move it to the start and return true.
 /// If keepStandardPartInPrivateUse is true, only a part before the first 'x' may be moved.
 /// Return true if an acceptable part was found.
 /// </summary>
 private bool MoveStandardPartToStart(SubTag from, Func <string, bool> test, bool keepStandardPartInPrivateUse)
 {
     foreach (string goodPart in from.AllParts)
     {
         if (keepStandardPartInPrivateUse && goodPart.Equals("x", StringComparison.OrdinalIgnoreCase))
         {
             return(false);
         }
         if (test(goodPart))
         {
             from.RemoveParts(goodPart);
             from.InsertAtStartOfSubtag(goodPart);
             return(true);
         }
     }
     return(false);
 }
Пример #3
0
        /// <summary>
        /// This method should move all subtag parts in the 'from' subtag which match the moveAllMatching predicate into the 'to' subtag.
        /// Because some parts of a subtag may match in more than one language tag area care must be taken to prevent emptying all parts of
        /// one subtag into another so the first part that matches the keepFirstMatching predicate will not be moved.
        /// i.e. if the languageTag is 'from' and the regionTag is 'to' and keepFirstMatching matches language codes and moveAllMatching
        /// matches region codes, all region looking parts would be placed in 'to' with the possible exception of the first language looking
        /// part.
        /// </summary>
        /// <param name="from">SubTag to move parts from</param>
        /// <param name="to">SubTag to move matching parts to</param>
        /// <param name="moveAllMatching">predicate matching parts to move</param>
        /// <param name="keepFirstMatching">predicate matching part to keep</param>
        private static void MoveTagsMatching(SubTag from, SubTag to, Predicate <string> moveAllMatching, Predicate <string> keepFirstMatching)
        {
            bool haveFirstMatching = false;
            var  allParts          = new List <string>(from.AllParts);

            foreach (var part in allParts)
            {
                if (!haveFirstMatching && keepFirstMatching(part))
                {
                    haveFirstMatching = true;
                    continue;
                }
                if (!moveAllMatching(part))
                {
                    continue;
                }
                to.AddToSubtag(part);
                from.RemoveParts(part);
            }
        }
Пример #4
0
        /// <summary>
        /// This is used to move one part if appropriate from the 'from' subtag to private use.
        /// Alternatively, if any part is appropriate for the tag according to the test predicate,
        /// it is moved to the first position (unless it follows an x- and keepStandardPartInPrivateUse is true).
        /// If we didn't find a valid part, but did move something, insert standardPrivateCode at the START of "from".
        /// As a side effect, this method may remove non-alphanumeric characters from the from tag.
        /// (I don't like having such a side effect, but it seems necessary to produce the desired behavior).
        /// </summary>
        private void MoveFirstPartToPrivateUseIfNecessary(SubTag from, Func <string, bool> test, string standardPrivatePart,
                                                          bool keepStandardPartInPrivateUse)
        {
            string part = from.AllParts.FirstOrDefault();

            if (part == null)
            {
                return;                 // nothing to move.
            }
            if (test(part))
            {
                return;                 // no need to move, it is a valid code for its slot.
            }
            if (MoveStandardPartToStart(from, test, keepStandardPartInPrivateUse))
            {
                return;
            }
            // If we exit this loop we need to move the first part to private use.
            // But first strip illegal characters since that may leave nothing to move,
            // or at least nothing of the first part we would otherwise move.
            // We do NOT want to do this BEFORE looking for good parts, because (for example) if we have a
            // region code like U!S-gb, we want to detect 'gb' as a good region code and keep that,
            // rather than fixing U!S to US and then choosing to keep that.
            from.RemoveNonAlphaNumericCharacters();
            // But, now we should scan again. If cleaning out bad characters resulted in a good code,
            // let's put it in the main part of the tag rather than private-use.
            if (MoveStandardPartToStart(from, test, keepStandardPartInPrivateUse))
            {
                return;
            }
            // OK, no good parts left. We will move the first part that is not an X.
            part = FirstNonXPart(from.AllParts);
            if (part == null)
            {
                return;
            }
            _privateUseSubTag.AddToSubtag(part);
            from.RemoveParts(part);
            from.InsertAtStartOfSubtag(standardPrivatePart);
        }
Пример #5
0
        /// <summary>
        /// If the given subtag has an "x" part move all subsequent parts into private use and remove the x
        /// and all subsequent parts from the from SubTag.
        /// </summary>
        /// <param name="from"></param>
        private void MovePartsToPrivateUseIfNecessary(SubTag from)
        {
            string movedParts = null;

            foreach (var part in from.AllParts)
            {
                if (movedParts == null && part.ToLowerInvariant().Equals("x"))
                {
                    movedParts = "x";
                }
                else if (movedParts != null)
                {
                    movedParts += "-";
                    movedParts += part;
                    _privateUseSubTag.AddToSubtag(part);
                }
            }
            if (movedParts != null)
            {
                from.RemoveParts(movedParts);
            }
        }
Пример #6
0
        /// <summary>
        /// Cleans the tag.
        /// </summary>
        public void Clean()
        {
            // Migrate legacy ISO3 language codes to IANA 2 letter language codes, if there's a match.
            // Do this before we look for valid codes, otherwise the 3-letter ones come up as invalid and
            // get moved to private use. However, only do this to languages not identified as private-use.
            if (!Language.StartsWith("x-", StringComparison.OrdinalIgnoreCase))
            {
                string migrateFrom = "";
                string migrateTo   = "";
                foreach (string part in _languageSubTag.AllParts)
                {
                    if (part.Equals("x", StringComparison.OrdinalIgnoreCase))
                    {
                        break;                         // don't migrate language code parts already explicitly marked private-use.
                    }
                    if (string.IsNullOrEmpty(migrateFrom))
                    {
                        LanguageSubtag language;
                        if (StandardSubtags.TryGetLanguageFromIso3Code(part, out language) && language.Code != language.Iso3Code)
                        {
                            migrateFrom = part;
                            migrateTo   = language.Code;
                        }
                    }
                }
                if (!String.IsNullOrEmpty(migrateFrom))
                {
                    _languageSubTag.RemoveParts(migrateFrom);
                    _languageSubTag.AddToSubtag(migrateTo);
                }
            }
            // The very next thing, before anything else gets moved to private use, is to move the parts whose position we
            // care about to the appropriate position in the private use section.
            // In the process we may remove anything non-alphanumeric, since otherwise we may move a marker that later
            // disappears (pathologically).
            MoveFirstPartToPrivateUseIfNecessary(_languageSubTag, StandardSubtags.IsValidIso639LanguageCode, "qaa", true);
            MoveFirstPartToPrivateUseIfNecessary(_scriptSubTag, StandardSubtags.IsValidIso15924ScriptCode, "Qaaa", false);
            MoveFirstPartToPrivateUseIfNecessary(_regionSubTag, StandardSubtags.IsValidIso3166RegionCode, "QM", false);
            //This fixes a bug where the LdmlAdaptorV1 was writing out Zxxx as part of the variant to mark an audio writing system
            if (_variantSubTag.Contains(WellKnownSubtags.AudioScript))
            {
                MoveTagsMatching(_variantSubTag, _scriptSubTag, tag => tag.Equals(WellKnownSubtags.AudioScript));
                _privateUseSubTag.AddToSubtag(WellKnownSubtags.AudioPrivateUse);
            }
            // Fixes various legacy problems.
            if (Language.Equals("cmn", StringComparison.OrdinalIgnoreCase))
            {
                Language = "zh";
            }
            if (Language.Equals("pes", StringComparison.OrdinalIgnoreCase))
            {
                Language = "fa";
            }
            if (Language.Equals("arb", StringComparison.OrdinalIgnoreCase))
            {
                Language = "ar";
            }
            if (Language.Equals("zh", StringComparison.OrdinalIgnoreCase) && String.IsNullOrEmpty(Region))
            {
                Region = "CN";
            }

            // If the language tag contains an x- , then move the string behind the x- to private use
            MovePartsToPrivateUseIfNecessary(_languageSubTag);

            // Move script, region, and variant present in the langauge tag to their proper subtag.
            MoveTagsMatching(_languageSubTag, _scriptSubTag, StandardSubtags.IsValidIso15924ScriptCode, StandardSubtags.IsValidIso639LanguageCode);
            MoveTagsMatching(_languageSubTag, _regionSubTag, StandardSubtags.IsValidIso3166RegionCode, StandardSubtags.IsValidIso639LanguageCode);
            MoveTagsMatching(_languageSubTag, _variantSubTag, StandardSubtags.IsValidRegisteredVariantCode, StandardSubtags.IsValidIso639LanguageCode);

            // Move all other tags that don't belong to the private use subtag.

            //keep track of everything that we moved
            var tempSubTag = new SubTag();

            MoveTagsMatching(_languageSubTag, tempSubTag, tag => !StandardSubtags.IsValidIso639LanguageCode(tag));
            //place all the moved parts in private use.
            foreach (var part in tempSubTag.AllParts)
            {
                _privateUseSubTag.AddToSubtag(part);
                //if it looks like we moved a custom script set the subtag to mark that we've moved it
                if (_scriptSubTag.IsEmpty &&
                    part.Length == 4 &&                     //potential custom script tag
                    !WellKnownSubtags.IpaPhonemicPrivateUse.EndsWith(part) &&
                    !WellKnownSubtags.IpaPhoneticPrivateUse.EndsWith(part))
                {
                    _scriptSubTag = new SubTag("Qaaa");
                }
            }

            MoveTagsMatching(_scriptSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidIso15924ScriptCode(tag));
            MoveTagsMatching(_regionSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidIso3166RegionCode(tag));
            MoveTagsMatching(_variantSubTag, _privateUseSubTag, tag => !StandardSubtags.IsValidRegisteredVariantCode(tag));

            _languageSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag);
            _scriptSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag);
            _regionSubTag.KeepFirstAndMoveRemainderTo(_privateUseSubTag);

            if (_privateUseSubTag.Contains(WellKnownSubtags.AudioPrivateUse))
            {
                // Move every tag that's not a Zxxx to private use
                if (!_scriptSubTag.IsEmpty && !_scriptSubTag.Contains(WellKnownSubtags.AudioScript))
                {
                    MoveTagsMatching(_scriptSubTag, _privateUseSubTag, tag => !_privateUseSubTag.Contains(tag));
                }
                // If we don't have a Zxxx already, set it. This protects tags already present, but with unusual case
                if (!_scriptSubTag.Contains(WellKnownSubtags.AudioScript))
                {
                    _scriptSubTag = new SubTag(WellKnownSubtags.AudioScript);
                }
            }

            //These two methods may produce duplicates that will subsequently be removed. Do we care? - TA 29/3/2011
            _privateUseSubTag.RemoveNonAlphaNumericCharacters();
            _privateUseSubTag.TruncatePartsToNumCharacters(8);

            _variantSubTag.RemoveDuplicates();
            _privateUseSubTag.RemoveDuplicates();
            // Any 'x' in the other tags will have arrived in the privateUse tag, so remove them.
            _privateUseSubTag.RemoveParts("x");

            // if language is empty, we need to add qaa, unless only a privateUse is present (e.g. x-blah is a valid rfc5646 tag)
            if ((_languageSubTag.IsEmpty && (!_scriptSubTag.IsEmpty || !_regionSubTag.IsEmpty || !_variantSubTag.IsEmpty)) ||
                (_languageSubTag.IsEmpty && _scriptSubTag.IsEmpty && _regionSubTag.IsEmpty && _variantSubTag.IsEmpty && _privateUseSubTag.IsEmpty))
            {
                _languageSubTag.AddToSubtag("qaa");
            }

            // Two more legacy problems. We don't allow -etic or -emic without fonipa, so insert if needed.
            // If it has some other standard variant we won't be able to fix it...not sure what the right answer would be.
            // At least we catch the more common case.
            foreach (string part in _privateUseSubTag.AllParts)
            {
                if (string.IsNullOrEmpty(Variant) &&
                    (part.Equals("etic", StringComparison.OrdinalIgnoreCase) || part.Equals("emic", StringComparison.OrdinalIgnoreCase)))
                {
                    Variant = "fonipa";
                }
            }
        }
Пример #7
0
		/// <summary>
		/// This method should move all subtag parts in the 'from' subtag which match the moveAllMatching predicate into the 'to' subtag.
		/// Because some parts of a subtag may match in more than one language tag area care must be taken to prevent emptying all parts of
		/// one subtag into another so the first part that matches the keepFirstMatching predicate will not be moved.
		/// i.e. if the languageTag is 'from' and the regionTag is 'to' and keepFirstMatching matches language codes and moveAllMatching
		/// matches region codes, all region looking parts would be placed in 'to' with the possible exception of the first language looking
		/// part.
		/// </summary>
		/// <param name="from">SubTag to move parts from</param>
		/// <param name="to">SubTag to move matching parts to</param>
		/// <param name="moveAllMatching">predicate matching parts to move</param>
		/// <param name="keepFirstMatching">predicate matching part to keep</param>
		private static void MoveTagsMatching(SubTag from, SubTag to, Predicate<string> moveAllMatching, Predicate<string> keepFirstMatching)
		{
			bool haveFirstMatching = false;
			var allParts = new List<string>(from.AllParts);
			foreach (var part in allParts)
			{
				if (!haveFirstMatching && keepFirstMatching(part))
				{
					haveFirstMatching = true;
					continue;
				}
				if (!moveAllMatching(part))
					continue;
				to.AddToSubtag(part);
				from.RemoveParts(part);
			}
		}
Пример #8
0
		private static void MoveTagsMatching(SubTag from, SubTag to, Predicate<string> moveAllMatching)
		{
			var list = new List<string>(from.AllParts.Where(part => moveAllMatching(part)));
			foreach (var part in list)
			{
				to.AddToSubtag(part);
				from.RemoveParts(part);
			}
		}
Пример #9
0
		/// <summary>
		/// If the given subtag has an "x" part move all subsequent parts into private use and remove the x
		/// and all subsequent parts from the from SubTag.
		/// </summary>
		/// <param name="from"></param>
		private void MovePartsToPrivateUseIfNecessary(SubTag from)
		{
			string movedParts = null;
			foreach (var part in from.AllParts)
			{
				if(movedParts == null && part.ToLowerInvariant().Equals("x"))
				{
					movedParts = "x";
				}
				else if(movedParts != null)
				{
					movedParts += "-";
					movedParts += part;
					_privateUseSubTag.AddToSubtag(part);
				}
			}
			if(movedParts != null)
				from.RemoveParts(movedParts);
		}
Пример #10
0
		/// <summary>
		/// If there is a standard part (that passes test) in the parts of the subtag, move it to the start and return true.
		/// If keepStandardPartInPrivateUse is true, only a part before the first 'x' may be moved.
		/// Return true if an acceptable part was found.
		/// </summary>
		private bool MoveStandardPartToStart(SubTag from, Func<string, bool> test, bool keepStandardPartInPrivateUse)
		{
			foreach (var goodPart in from.AllParts)
			{
				if (keepStandardPartInPrivateUse && goodPart.Equals("x", StringComparison.OrdinalIgnoreCase))
					return false;
				if (test(goodPart))
				{
					from.RemoveParts(goodPart);
					from.InsertAtStartOfSubtag(goodPart);
					return true;
				}
			}
			return false;
		}
Пример #11
0
		/// <summary>
		/// This is used to move one part if appropriate from the 'from' subtag to private use.
		/// Alternatively, if any part is appropriate for the tag according to the test predicate,
		/// it is moved to the first position (unless it follows an x- and keepStandardPartInPrivateUse is true).
		/// If we didn't find a valid part, but did move something, insert standardPrivateCode at the START of "from".
		/// As a side effect, this method may remove non-alphanumeric characters from the from tag.
		/// (I don't like having such a side effect, but it seems necessary to produce the desired behavior).
		/// </summary>
		private void MoveFirstPartToPrivateUseIfNecessary(SubTag from, Func<string, bool> test, string standardPrivatePart,
			bool keepStandardPartInPrivateUse)
		{
			var part = from.AllParts.FirstOrDefault();
			if (part == null)
				return; // nothing to move.
			if (test(part))
				return; // no need to move, it is a valid code for its slot.
			if (MoveStandardPartToStart(from, test, keepStandardPartInPrivateUse))
				return;
			// If we exit this loop we need to move the first part to private use.
			// But first strip illegal characters since that may leave nothing to move,
			// or at least nothing of the first part we would otherwise move.
			// We do NOT want to do this BEFORE looking for good parts, because (for example) if we have a
			// region code like U!S-gb, we want to detect 'gb' as a good region code and keep that,
			// rather than fixing U!S to US and then choosing to keep that.
			from.RemoveNonAlphaNumericCharacters();
			// But, now we should scan again. If cleaning out bad characters resulted in a good code,
			// let's put it in the main part of the tag rather than private-use.
			if (MoveStandardPartToStart(from, test, keepStandardPartInPrivateUse))
				return;
			// OK, no good parts left. We will move the first part that is not an X.
			part = FirstNonXPart(from.AllParts);
			if (part == null)
				return;
			_privateUseSubTag.AddToSubtag(part);
			from.RemoveParts(part);
			from.InsertAtStartOfSubtag(standardPrivatePart);
		}