예제 #1
0
			static CSLNameComponents SplitToWordsCommas(List<WordToken> tokens, int[] commaIndexes)
			{
				var result = new CSLNameComponents();
				var familyNameTokens = new List<WordToken>();
				var givenNameTokens = new List<WordToken>();
				var droppingParticleNameTokens = new List<WordToken>();

				// Find the last comma
				var commaIndex = commaIndexes[commaIndexes.Length - 1];

				// Everything after the (last) comma goes to given name
				tokens.RemoveAt(commaIndex);
				givenNameTokens.AddRange(ExtractTokenRange(tokens, commaIndex));

				// Check if multiple commas are present
				if (commaIndex != commaIndexes[0])
				{
					// Remove the first comma
					commaIndex = commaIndexes[0];
					tokens.RemoveAt(commaIndex);

					// Extract following tokens (removing any extra commas)
					var tokensBetweenCommas = ExtractTokenRange(tokens, commaIndex);
					tokensBetweenCommas.RemoveAll(token => token.IsComma);

					// Add to the dropping-particle
					droppingParticleNameTokens.AddRange(tokensBetweenCommas);
				}

				if (tokens.Count > 0)
				{
					// The last word before the (first) comma goes to family name
					familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1));

					int firstLowerCaseIndex;
					int lowerCaseSequenceCount;
					FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount);

					// Did we find any?
					if (lowerCaseSequenceCount > 0)
					{
						// Yes, so extract everything from the start (not the first lower-case token!) 
						// to the last lower-case as dropping particles
						droppingParticleNameTokens.InsertRange(0, ExtractTokenRange(tokens, 0, firstLowerCaseIndex + lowerCaseSequenceCount));
					}

					// Anything else goes into family
					if (tokens.Count > 0)
					{
						familyNameTokens.InsertRange(0, tokens);
					}
				}

				if (familyNameTokens.Count > 0) result.Family = JoinTokens(familyNameTokens);
				if (givenNameTokens.Count > 0) result.Given = JoinTokens(givenNameTokens);
				if (droppingParticleNameTokens.Count > 0) result.DroppingParticle = JoinTokens(droppingParticleNameTokens);

				return result;
			}
예제 #2
0
            static CSLNameComponents SplitToWordsNoCommas(List <WordToken> tokens)
            {
                var result                     = new CSLNameComponents();
                var familyNameTokens           = new List <WordToken>();
                var givenNameTokens            = new List <WordToken>();
                var droppingParticleNameTokens = new List <WordToken>();

                // Always at least a family name
                familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1));

                // Do we have any remaining tokens?
                if (tokens.Count != 0)
                {
                    // Yes, so find the range of the tokens which are lower-cased
                    int firstLowerCaseIndex;
                    int lowerCaseSequenceCount;
                    FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount);

                    // Did we find any?
                    if (lowerCaseSequenceCount > 0)
                    {
                        // Yes, so extract them as dropping particles
                        droppingParticleNameTokens.AddRange(ExtractTokenRange(tokens, firstLowerCaseIndex, lowerCaseSequenceCount));

                        // Were there any word tokens after this section?
                        if (tokens.Count - firstLowerCaseIndex > 0)
                        {
                            // Yes, so they are inserted before the family name
                            familyNameTokens.InsertRange(0, ExtractTokenRange(tokens, firstLowerCaseIndex, tokens.Count - firstLowerCaseIndex));
                        }
                    }

                    // Given is anything left
                    result.Given = JoinTokens(tokens);
                }

                if (familyNameTokens.Count > 0)
                {
                    result.Family = JoinTokens(familyNameTokens);
                }
                if (givenNameTokens.Count > 0)
                {
                    result.Given = JoinTokens(givenNameTokens);
                }
                if (droppingParticleNameTokens.Count > 0)
                {
                    result.DroppingParticle = JoinTokens(droppingParticleNameTokens);
                }

                return(result);
            }
예제 #3
0
		static void Check(CSLNameComponents nameComponents, 
			string expectedFamily = null,
			string expectedGiven = null,
			string expectedDroppingParticle = null,
			string expectedNonDroppingParticle = null,
			string expectedSuffix = null,
			bool? expectedCommaSuffix = null,
			bool? expectedStaticOrdering = null	)
		{
			if (expectedFamily != null)
			{
				Assert.AreEqual(expectedFamily, nameComponents.Family, "Family");
			}

			if (expectedGiven != null)
			{
				Assert.AreEqual(expectedGiven, nameComponents.Given, "Given");
			}	

			if (expectedDroppingParticle != null)
			{
				Assert.AreEqual(expectedDroppingParticle, nameComponents.DroppingParticle, "Dropping Particle");
			}	

			if (expectedNonDroppingParticle != null)
			{
				Assert.AreEqual(expectedNonDroppingParticle, nameComponents.NonDroppingParticle, "Non-Dropping Particle");
			}	

			if (expectedSuffix != null)
			{
				Assert.AreEqual(expectedSuffix, nameComponents.Suffix, "Suffix");
			}	

			if (expectedCommaSuffix != null)
			{
				Assert.AreEqual(expectedCommaSuffix, nameComponents.CommaSuffix, "Comma-Suffix");
			}	

			if (expectedStaticOrdering != null)
			{
				Assert.AreEqual(expectedStaticOrdering, nameComponents.StaticOrdering, "Static-Ordering");
			}	
		}
예제 #4
0
			static CSLNameComponents SplitToWordsNoCommas(List<WordToken> tokens)
			{
				var result = new CSLNameComponents();
				var familyNameTokens = new List<WordToken>();
				var givenNameTokens = new List<WordToken>();
				var droppingParticleNameTokens = new List<WordToken>();

				// Always at least a family name
				familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1));

				// Do we have any remaining tokens?
				if (tokens.Count != 0)
				{
					// Yes, so find the range of the tokens which are lower-cased
					int firstLowerCaseIndex;
					int lowerCaseSequenceCount;
					FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount);

					// Did we find any?
					if (lowerCaseSequenceCount > 0)
					{
						// Yes, so extract them as dropping particles
						droppingParticleNameTokens.AddRange(ExtractTokenRange(tokens, firstLowerCaseIndex, lowerCaseSequenceCount));

						// Were there any word tokens after this section?
						if (tokens.Count - firstLowerCaseIndex > 0)
						{
							// Yes, so they are inserted before the family name
							familyNameTokens.InsertRange(0, ExtractTokenRange(tokens, firstLowerCaseIndex, tokens.Count - firstLowerCaseIndex));
						}
					}

					// Given is anything left
					result.Given = JoinTokens(tokens);
				}

				if (familyNameTokens.Count > 0) result.Family = JoinTokens(familyNameTokens);
				if (givenNameTokens.Count > 0) result.Given = JoinTokens(givenNameTokens);
				if (droppingParticleNameTokens.Count > 0) result.DroppingParticle = JoinTokens(droppingParticleNameTokens);

				return result;
			}
예제 #5
0
            static CSLNameComponents SplitToWordsCommas(List <WordToken> tokens, int[] commaIndexes)
            {
                var result                     = new CSLNameComponents();
                var familyNameTokens           = new List <WordToken>();
                var givenNameTokens            = new List <WordToken>();
                var droppingParticleNameTokens = new List <WordToken>();

                // Find the last comma
                var commaIndex = commaIndexes[commaIndexes.Length - 1];

                // Everything after the (last) comma goes to given name
                tokens.RemoveAt(commaIndex);
                givenNameTokens.AddRange(ExtractTokenRange(tokens, commaIndex));

                // Check if multiple commas are present
                if (commaIndex != commaIndexes[0])
                {
                    // Remove the first comma
                    commaIndex = commaIndexes[0];
                    tokens.RemoveAt(commaIndex);

                    // Extract following tokens (removing any extra commas)
                    var tokensBetweenCommas = ExtractTokenRange(tokens, commaIndex);
                    tokensBetweenCommas.RemoveAll(token => token.IsComma);

                    // Add to the dropping-particle
                    droppingParticleNameTokens.AddRange(tokensBetweenCommas);
                }

                if (tokens.Count > 0)
                {
                    // The last word before the (first) comma goes to family name
                    familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1));

                    int firstLowerCaseIndex;
                    int lowerCaseSequenceCount;
                    FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount);

                    // Did we find any?
                    if (lowerCaseSequenceCount > 0)
                    {
                        // Yes, so extract everything from the start (not the first lower-case token!)
                        // to the last lower-case as dropping particles
                        droppingParticleNameTokens.InsertRange(0, ExtractTokenRange(tokens, 0, firstLowerCaseIndex + lowerCaseSequenceCount));
                    }

                    // Anything else goes into family
                    if (tokens.Count > 0)
                    {
                        familyNameTokens.InsertRange(0, tokens);
                    }
                }

                if (familyNameTokens.Count > 0)
                {
                    result.Family = JoinTokens(familyNameTokens);
                }
                if (givenNameTokens.Count > 0)
                {
                    result.Given = JoinTokens(givenNameTokens);
                }
                if (droppingParticleNameTokens.Count > 0)
                {
                    result.DroppingParticle = JoinTokens(droppingParticleNameTokens);
                }

                return(result);
            }