static CSLNameComponents SplitToWordsCommas(List<WordToken> tokens, int[] commaIndexes) { var result = new CSLNameComponents(); var familyNameTokens = new List<WordToken>(); var givenNameTokens = new List<WordToken>(); var droppingParticleNameTokens = new List<WordToken>(); // Find the last comma var commaIndex = commaIndexes[commaIndexes.Length - 1]; // Everything after the (last) comma goes to given name tokens.RemoveAt(commaIndex); givenNameTokens.AddRange(ExtractTokenRange(tokens, commaIndex)); // Check if multiple commas are present if (commaIndex != commaIndexes[0]) { // Remove the first comma commaIndex = commaIndexes[0]; tokens.RemoveAt(commaIndex); // Extract following tokens (removing any extra commas) var tokensBetweenCommas = ExtractTokenRange(tokens, commaIndex); tokensBetweenCommas.RemoveAll(token => token.IsComma); // Add to the dropping-particle droppingParticleNameTokens.AddRange(tokensBetweenCommas); } if (tokens.Count > 0) { // The last word before the (first) comma goes to family name familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1)); int firstLowerCaseIndex; int lowerCaseSequenceCount; FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount); // Did we find any? if (lowerCaseSequenceCount > 0) { // Yes, so extract everything from the start (not the first lower-case token!) // to the last lower-case as dropping particles droppingParticleNameTokens.InsertRange(0, ExtractTokenRange(tokens, 0, firstLowerCaseIndex + lowerCaseSequenceCount)); } // Anything else goes into family if (tokens.Count > 0) { familyNameTokens.InsertRange(0, tokens); } } if (familyNameTokens.Count > 0) result.Family = JoinTokens(familyNameTokens); if (givenNameTokens.Count > 0) result.Given = JoinTokens(givenNameTokens); if (droppingParticleNameTokens.Count > 0) result.DroppingParticle = JoinTokens(droppingParticleNameTokens); return result; }
static CSLNameComponents SplitToWordsNoCommas(List <WordToken> tokens) { var result = new CSLNameComponents(); var familyNameTokens = new List <WordToken>(); var givenNameTokens = new List <WordToken>(); var droppingParticleNameTokens = new List <WordToken>(); // Always at least a family name familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1)); // Do we have any remaining tokens? if (tokens.Count != 0) { // Yes, so find the range of the tokens which are lower-cased int firstLowerCaseIndex; int lowerCaseSequenceCount; FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount); // Did we find any? if (lowerCaseSequenceCount > 0) { // Yes, so extract them as dropping particles droppingParticleNameTokens.AddRange(ExtractTokenRange(tokens, firstLowerCaseIndex, lowerCaseSequenceCount)); // Were there any word tokens after this section? if (tokens.Count - firstLowerCaseIndex > 0) { // Yes, so they are inserted before the family name familyNameTokens.InsertRange(0, ExtractTokenRange(tokens, firstLowerCaseIndex, tokens.Count - firstLowerCaseIndex)); } } // Given is anything left result.Given = JoinTokens(tokens); } if (familyNameTokens.Count > 0) { result.Family = JoinTokens(familyNameTokens); } if (givenNameTokens.Count > 0) { result.Given = JoinTokens(givenNameTokens); } if (droppingParticleNameTokens.Count > 0) { result.DroppingParticle = JoinTokens(droppingParticleNameTokens); } return(result); }
static void Check(CSLNameComponents nameComponents, string expectedFamily = null, string expectedGiven = null, string expectedDroppingParticle = null, string expectedNonDroppingParticle = null, string expectedSuffix = null, bool? expectedCommaSuffix = null, bool? expectedStaticOrdering = null ) { if (expectedFamily != null) { Assert.AreEqual(expectedFamily, nameComponents.Family, "Family"); } if (expectedGiven != null) { Assert.AreEqual(expectedGiven, nameComponents.Given, "Given"); } if (expectedDroppingParticle != null) { Assert.AreEqual(expectedDroppingParticle, nameComponents.DroppingParticle, "Dropping Particle"); } if (expectedNonDroppingParticle != null) { Assert.AreEqual(expectedNonDroppingParticle, nameComponents.NonDroppingParticle, "Non-Dropping Particle"); } if (expectedSuffix != null) { Assert.AreEqual(expectedSuffix, nameComponents.Suffix, "Suffix"); } if (expectedCommaSuffix != null) { Assert.AreEqual(expectedCommaSuffix, nameComponents.CommaSuffix, "Comma-Suffix"); } if (expectedStaticOrdering != null) { Assert.AreEqual(expectedStaticOrdering, nameComponents.StaticOrdering, "Static-Ordering"); } }
static CSLNameComponents SplitToWordsNoCommas(List<WordToken> tokens) { var result = new CSLNameComponents(); var familyNameTokens = new List<WordToken>(); var givenNameTokens = new List<WordToken>(); var droppingParticleNameTokens = new List<WordToken>(); // Always at least a family name familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1)); // Do we have any remaining tokens? if (tokens.Count != 0) { // Yes, so find the range of the tokens which are lower-cased int firstLowerCaseIndex; int lowerCaseSequenceCount; FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount); // Did we find any? if (lowerCaseSequenceCount > 0) { // Yes, so extract them as dropping particles droppingParticleNameTokens.AddRange(ExtractTokenRange(tokens, firstLowerCaseIndex, lowerCaseSequenceCount)); // Were there any word tokens after this section? if (tokens.Count - firstLowerCaseIndex > 0) { // Yes, so they are inserted before the family name familyNameTokens.InsertRange(0, ExtractTokenRange(tokens, firstLowerCaseIndex, tokens.Count - firstLowerCaseIndex)); } } // Given is anything left result.Given = JoinTokens(tokens); } if (familyNameTokens.Count > 0) result.Family = JoinTokens(familyNameTokens); if (givenNameTokens.Count > 0) result.Given = JoinTokens(givenNameTokens); if (droppingParticleNameTokens.Count > 0) result.DroppingParticle = JoinTokens(droppingParticleNameTokens); return result; }
static CSLNameComponents SplitToWordsCommas(List <WordToken> tokens, int[] commaIndexes) { var result = new CSLNameComponents(); var familyNameTokens = new List <WordToken>(); var givenNameTokens = new List <WordToken>(); var droppingParticleNameTokens = new List <WordToken>(); // Find the last comma var commaIndex = commaIndexes[commaIndexes.Length - 1]; // Everything after the (last) comma goes to given name tokens.RemoveAt(commaIndex); givenNameTokens.AddRange(ExtractTokenRange(tokens, commaIndex)); // Check if multiple commas are present if (commaIndex != commaIndexes[0]) { // Remove the first comma commaIndex = commaIndexes[0]; tokens.RemoveAt(commaIndex); // Extract following tokens (removing any extra commas) var tokensBetweenCommas = ExtractTokenRange(tokens, commaIndex); tokensBetweenCommas.RemoveAll(token => token.IsComma); // Add to the dropping-particle droppingParticleNameTokens.AddRange(tokensBetweenCommas); } if (tokens.Count > 0) { // The last word before the (first) comma goes to family name familyNameTokens.Add(ExtractToken(tokens, tokens.Count - 1)); int firstLowerCaseIndex; int lowerCaseSequenceCount; FindLowerCaseTokenRange(tokens, out firstLowerCaseIndex, out lowerCaseSequenceCount); // Did we find any? if (lowerCaseSequenceCount > 0) { // Yes, so extract everything from the start (not the first lower-case token!) // to the last lower-case as dropping particles droppingParticleNameTokens.InsertRange(0, ExtractTokenRange(tokens, 0, firstLowerCaseIndex + lowerCaseSequenceCount)); } // Anything else goes into family if (tokens.Count > 0) { familyNameTokens.InsertRange(0, tokens); } } if (familyNameTokens.Count > 0) { result.Family = JoinTokens(familyNameTokens); } if (givenNameTokens.Count > 0) { result.Given = JoinTokens(givenNameTokens); } if (droppingParticleNameTokens.Count > 0) { result.DroppingParticle = JoinTokens(droppingParticleNameTokens); } return(result); }