public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled) { handled = false; if (componentPart == null) { return(null); } if (citation == null || citation.Reference == null) { return(null); } var periodical = citation.Reference.Periodical; if (periodical == null) { return(null); } var notesString = periodical.Notes; if (string.IsNullOrWhiteSpace(notesString)) { return(null); } var periodicalFieldElement = componentPart.GetFieldElements().FirstOrDefault <FieldElement>(item => item.PropertyId == ReferencePropertyId.Periodical); if (periodicalFieldElement == null) { return(null); } var output = componentPart.GetTextUnitsUnfiltered(citation, template); var notesPrefixTextUnit = new LiteralTextUnit(" ["); notesPrefixTextUnit.FontStyle = FontStyle.Neutral; var notesTextUnit = new LiteralTextUnit(notesString); notesTextUnit.FontStyle = FontStyle.Neutral; var notesSuffixTextUnit = new LiteralTextUnit("]"); notesSuffixTextUnit.FontStyle = FontStyle.Neutral; output.Add(notesPrefixTextUnit); output.Add(notesTextUnit); output.Add(notesSuffixTextUnit); handled = true; return(output); }
public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled) { handled = false; if (template == null) { return(null); } if (citation == null) { return(null); } if (citation.Reference == null) { return(null); } if (componentPart == null) { return(null); } if (componentPart.Elements == null) { return(null); } //let Citavi do the apply conditions on elements inside the component part var originalTextUnits = componentPart.GetTextUnitsUnfiltered(citation, template); if (originalTextUnits == null) { return(null); } var outputTextUnits = new TextUnitCollection(); foreach (var textUnit in originalTextUnits) { var literalTextUnit = textUnit as LiteralTextUnit; if (literalTextUnit != null) { var text = literalTextUnit.Text; if (!string.IsNullOrEmpty(text)) { var originalFontStyle = literalTextUnit.FontStyle; if (literalTextUnit.HasTemporaryFontStyle()) { originalFontStyle |= literalTextUnit.TemporaryFontStyle; } var newTextUnits = literalTextUnit.LiteralElement.TaggedTextToTextUnits(text, originalFontStyle); if (newTextUnits != null && newTextUnits.Count > 0) { outputTextUnits.AddRange(newTextUnits); } } continue; } var fieldTextUnit = textUnit as FieldTextUnit; if (fieldTextUnit != null) { var text = fieldTextUnit.Text; if (!string.IsNullOrEmpty(text)) { var originalFontStyle = fieldTextUnit.FontStyle; if (fieldTextUnit.HasTemporaryFontStyle()) { originalFontStyle |= fieldTextUnit.TemporaryFontStyle; } var newTextUnits = fieldTextUnit.FieldElement.TaggedTextToTextUnits(text, originalFontStyle); if (newTextUnits != null && newTextUnits.Count > 0) { outputTextUnits.AddRange(newTextUnits); } } } } handled = true; return(outputTextUnits); }
//Version 3.0: complete overhaul, script considers different output for placehoder citations and bibliography citations //Version 2.0: script can be attached to both date/time field element as well as text field element public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled) { //enter the culture the date info has been formatted and entered in, e.g. 12/05/2017 would be December, 12th in en-UK and May, 5th in en-US CultureInfo targetCulture = CultureInfo.CreateSpecificCulture("en-US"); //list all possible date formats for this script to check; the scripts tries to parse the date beginning from left to right string[] formats = new string[] { "yyyy-MM-dd", "yyyy/MM/dd", "dd/MM/yyyy", "yyyy/dd/MM", "dd.MM.yyyy", "d.M.yyyy", "d.MM.yyyy", "dd.M.yyyy", "dd.MM.yy", "d.M.yy", "d.MM.yy", "dd.M.yy" }; bool usePeriodAfterAbbreviatedMonthName = true; //if true, month names will be: Jan. Feb. Mar. Apr. May (!) Jun. Jul. Aug. Sept. Oct. Nov. Dec. ///IMPORTANT: Use the following indexed placeholders {n} and format strings xxxx as in {n:xxxx} for the templates. ///You can ommit placeholders and/or place them freely inside the templates below. Yet, it is not recommended to use the same placeholder more than once, ///because this script is not optimized for this. /// ///{0}: letter for ambiguity resolving ///{1}: year of start or single date ///{2}: month of start or single date ///{3}: day of start or single date ///{4}: year of end date ///{5}: month of end date ///{6}: day of end date ///use the following formatting for "6 June 2018" ///YEAR: yyyy = 2018, yy = 18 ///MONTH: MMMM = June, MMM = Jun, MM = 06, %M = 6 ///DAY: dd = 06, %d = 6, %do = 6th //SINGLE DATE - output format templates string outputFormatSingleDatePlaceholder = "{1:yyyy}{0}"; //e.g. 2013a string outputFormatSingleDateBibliography = "{1:yyyy}{0}, {2:MMMM} {3:%do}"; //e.g. 2013a, January 6th //DATE RANGE - output format templates //same year, same month string outputFormatDateRangeSameYearSameMonthPlaceholder = "{1:yyyy}{0}"; //e.g. 2013a string outputFormatDateRangeSameYearSameMonthBibliography = "{1:yyyy}{0}, {2:MMMM} {3:%do} - {6:%do}"; //e.g. 2013a, January 6th - 9th //same year, different month string outputFormatDateRangeSameYearDifferentMonthPlaceholder = "{1:yyyy}{0}"; //e.g. 2013a string outputFormatDateRangeSameYearDifferentMonthBibliography = "{1:yyyy}{0}, {2:MMMM} {3:%do} - {5:MMMM} {6:%do}"; //e.g. 2013a, September 28th - October 3rd //different years string outputFormatDateRangeDifferentYearsPlaceholder = "{1:yyyy}/{4:yyyy}{0}"; //e.g. 2013/2014a string outputFormatDateRangeDifferentYearsBibliography = "{1:yyyy}/{4:yyyy}{0}; {1:yyyy}, {2:MMMM} {3:%do} - {4:yyyy}, {5:MMMM} {6:%do}"; //e.g. 2013/2014a; 2013, December 29th - 2014, January 4th handled = false; if (citation == null) { return(null); } Reference referenceInScope = GetReferenceInScope(componentPart, citation); if (referenceInScope == null) { return(null); } FieldElement dateFieldElement = GetDateFieldElement(componentPart); if (dateFieldElement == null) { return(null); } ReferencePropertyId referencePropertyId = dateFieldElement.PropertyId; string dateString = referenceInScope.GetValue(referencePropertyId) as string; if (string.IsNullOrEmpty(dateString)) { return(null); } TextUnitCollection output = null; PlaceholderCitation placeholderCitation = citation as PlaceholderCitation; bool isPlaceholderCitation = placeholderCitation != null; PreviewCitation previewCitation = citation as PreviewCitation; bool isPreviewBibliographyCitation = previewCitation != null && citation.CitationType == CitationType.Bibliography; BibliographyCitation bibliographyCitation = citation as BibliographyCitation; bool isBibliographyCitation = bibliographyCitation != null; if (bibliographyCitation == null && placeholderCitation != null) { bibliographyCitation = placeholderCitation.CorrespondingBibliographyCitation; } if (bibliographyCitation == null && !isPreviewBibliographyCitation) { return(null); } string identifyingLetter = bibliographyCitation != null ? bibliographyCitation.IdentifyingLetter : string.Empty; LiteralTextUnit identifyingLetterTextUnit = new LiteralTextUnit(identifyingLetter, Drawing.FontStyle.Neutral); bool hasIdentifyingLetter = !string.IsNullOrEmpty(identifyingLetter); #region Tread n.d. + letter for disambiguation ("IdentifyingLetter") if (hasIdentifyingLetter && ContainsND(dateString)) { //we make sure the IdentifyingLetter is separated from n.d. by a space char or hyphen: Smith n.d.-a, Smith n.d.-b //go to method SeparateIdentifyingLetterFromND below to customize output = componentPart.GetTextUnitsUnfiltered(citation, template); if (output == null || !output.Any()) { return(null); } handled = true; return(SeparateIdentifyingLetterFromND(output, identifyingLetter)); } #endregion FontStyle fontStyle = dateFieldElement is DateTimeFieldElement ? ((DateTimeFieldElement)dateFieldElement).FontStyle : ((TextFieldElement)dateFieldElement).FontStyle; DateTime dateSingle; DateTime dateStart; DateTime dateEnd; string outputText = string.Empty; #region Check for Single Date if (TryParseSingleDate(dateString, formats, targetCulture, out dateSingle)) { #region BibliographyCitation if (isBibliographyCitation || isPreviewBibliographyCitation) { outputText = FormatDate(dateSingle, outputFormatSingleDateBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion #region PlaceholderCitation else if (isPlaceholderCitation) { outputText = FormatDate(dateSingle, outputFormatSingleDatePlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion #region Other else { handled = false; return(null); } #endregion } #endregion #region Check for Date Range else if (TryParseDateRange(dateString, formats, targetCulture, out dateStart, out dateEnd)) { #region BibliographyCitation if (isBibliographyCitation || isPreviewBibliographyCitation) { #region same year, same month if (dateStart.Year == dateEnd.Year && dateStart.Month == dateEnd.Month && dateStart.Day != dateEnd.Day) { outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearSameMonthBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion #region same year, different months else if (dateStart.Year == dateEnd.Year && dateStart.Month != dateEnd.Month) { outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearDifferentMonthBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion #region different years else { outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeDifferentYearsBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion } #endregion #region PlaceholderCitation else if (isPlaceholderCitation) { #region same year, same month if (dateStart.Year == dateEnd.Year && dateStart.Month == dateEnd.Month && dateStart.Day != dateEnd.Day) { outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearSameMonthPlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion #region same year, different months else if (dateStart.Year == dateEnd.Year && dateStart.Month != dateEnd.Month) { outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearDifferentMonthPlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion #region different years else { outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeDifferentYearsPlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName); } #endregion } #endregion #region Other else { handled = false; return(null); } #endregion } #endregion #region Do the output if (!string.IsNullOrEmpty(outputText)) { var outputTextUnits = new TextUnitCollection(); outputTextUnits = TextUnitCollectionUtility.TaggedTextToTextUnits(dateFieldElement, outputText, fontStyle); if (outputTextUnits.Any()) { List <ITextUnit> componentPartOutput = new List <ITextUnit>(); foreach (IElement element in componentPart.Elements) { if (element == dateFieldElement) { componentPartOutput.AddRange(outputTextUnits); } else { componentPartOutput.AddRange(element.GetTextUnits(citation, template)); } } handled = true; return(componentPartOutput); } } #endregion handled = false; return(null); }
public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled) { var ensureEnglishIsReferenceLanguage = true; //if set to false, the component part filter will ALWAYS capitalize, regardless of the reference's language var upperCaseAfterPunctuation = true; //if set to false, everything but the very first word will be lower case CultureInfo culture = CultureInfo.CurrentCulture; handled = false; if (citation == null) { return(null); } if (citation.Reference == null) { return(null); } if (componentPart == null) { return(null); } if (template == null) { return(null); } if (ensureEnglishIsReferenceLanguage) { string languageResolved = citation.Reference.Language; if (componentPart.Scope == ComponentPartScope.Reference) { //if ComponentPartScope is Reference, language can come from Reference or ParentReference if (string.IsNullOrEmpty(languageResolved) && citation.Reference.ParentReference != null) { languageResolved = citation.Reference.ParentReference.Language; } if (string.IsNullOrEmpty(languageResolved)) { return(null); } } else { //if ComponentPartScope is ParentReference, language MUST come from ParentReference if (citation.Reference.ParentReference == null) { return(null); } languageResolved = citation.Reference.ParentReference.Language; } if (string.IsNullOrEmpty(languageResolved)) { return(null); } var termsList = new string[] { "en", "eng", "engl", "english", "Englisch" }; var regEx = new Regex(@"\b(" + string.Join("|", termsList) + @")\b", RegexOptions.IgnoreCase); if (!regEx.IsMatch(languageResolved)) { return(null); } } var textUnits = componentPart.GetTextUnitsUnfiltered(citation, template); if (textUnits == null || !textUnits.Any()) { return(null); } //Expressions that must not be changed with regards to capitalization List <string> printAsStatedExpressions = new List <string>() { "US", "USA", "UK", "UN", "ZDF", "ARD", "GmbH", "WDR", "Microsoft", "Google", "Cologne", "London", "Paris", "Moscow", "Germany", "France", "Italy", "Russia", "Sweden", "United Nations", "United States of America", "European Union", "CEO", "CSR", "VC", "VCs", "American", "CEOs" }; printAsStatedExpressions.Sort((x, y) => y.Length.CompareTo(x.Length)); //descending: longer ones first //Break the input text into a list of words at whitespaces, //hyphens, opening parens, and ASCII quotation marks //as well as the above doNotTouchExpressions string splitInterpunctuation = @"(\s)|(-)|(\()|(\))|(\"")|(\.)|(:)|(\?)|(!)"; string splitPattern = printAsStatedExpressions.Count == 0 ? splitInterpunctuation : string.Format(@"({0})", String.Join("|", printAsStatedExpressions.Select(x => string.Format(@"\b{0}\b", Regex.Escape(x))))) + "|" + splitInterpunctuation; string matchInterpunctuation = @"(\.)|(:)|(\?)|(!)"; for (int i = 0; i < textUnits.Count; i++) { //textUnit.Text = textUnits[i].Text.ToLower(culture); var text = textUnits[i].Text; List <string> words = Regex.Split(text, splitPattern, RegexOptions.IgnoreCase).Where(x => !string.IsNullOrEmpty(x)).ToList(); text = string.Empty; for (int j = 0; j < words.Count; j++) { var word = words[j].ToString(); if (Regex.IsMatch(word, matchInterpunctuation) || word.Equals(" ")) { //space or punctuation text = text + word; continue; } string printAsStatedExpression = printAsStatedExpressions.FirstOrDefault(ex => ex.Equals(word, StringComparison.OrdinalIgnoreCase)); if (!string.IsNullOrEmpty(printAsStatedExpression)) { text = text + printAsStatedExpression; continue; } if ((i == 0) && (j == 0)) { text = text + ToUpperFirstLetter(word, culture); } else if (upperCaseAfterPunctuation && ((j > 0 && Regex.IsMatch(words[j - 1], matchInterpunctuation)) || (j > 1 && Regex.IsMatch(words[j - 2], matchInterpunctuation)))) { text = text + ToUpperFirstLetter(word, culture); } else { text = text + word.ToLower(culture); } } textUnits[i].Text = text; } handled = true; return(textUnits); }
public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled) { handled = false; if (componentPart == null) { return(null); } if (citation == null || citation.Reference == null) { return(null); } var personFieldElementCount = componentPart.Elements.Where(fieldElement => fieldElement is PersonFieldElement).Count(); if (personFieldElementCount != 1) { return(null); } var personFieldElement = componentPart.GetFieldElements().FirstOrDefault(fieldElement => fieldElement is PersonFieldElement) as PersonFieldElement; if (personFieldElement == null) { return(null); } if (!personFieldElement.Abbreviate) { return(null); } var abbreviation = personFieldElement.Abbreviation; if (abbreviation == null) { return(null); } if (string.IsNullOrWhiteSpace(abbreviation.Text)) { return(null); } if (abbreviation.Text != ", et al.") { return(null); } var output = componentPart.GetTextUnitsUnfiltered(citation, template); var index = 0; foreach (ITextUnit textUnit in output) { if (textUnit.Text == ", et al.") { var newTextUnit = new LiteralTextUnit("et al."); newTextUnit.FontStyle = textUnit.FontStyle; textUnit.Text = ", "; textUnit.FontStyle = FontStyle.Neutral; output.Insert(++index, newTextUnit); break; } index++; } handled = true; return(output); }
public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled) { var ensureEnglishIsReferenceLanguage = true; //if set to false, the component part filter will ALWAYS capitalize, regardless of the reference's language var upperCaseAfterPunctuation = true; //if set to false, everything but the very first word will be lower case var modeStrict = false; //only applicable if ensureEnglishIsReferenceLanguage = true: //if modeStrict = true, it will only capitalize references that have "en" or "eng" etc. in the language field //if modeStrict = false, it will also capitalize references that have an empty language field CultureInfo culture = CultureInfo.CurrentCulture; handled = false; if (citation == null) { return(null); } if (citation.Reference == null) { return(null); } if (componentPart == null) { return(null); } if (template == null) { return(null); } if (ensureEnglishIsReferenceLanguage) { string languageResolved = citation.Reference.Language; if (componentPart.Scope == ComponentPartScope.Reference) { //if ComponentPartScope is Reference, language can come from Reference or ParentReference if (string.IsNullOrEmpty(languageResolved) && citation.Reference.ParentReference != null) { languageResolved = citation.Reference.ParentReference.Language; } if (string.IsNullOrEmpty(languageResolved) && modeStrict) { return(null); } } else { //if ComponentPartScope is ParentReference, language MUST come from ParentReference if (citation.Reference.ParentReference == null) { return(null); } languageResolved = citation.Reference.ParentReference.Language; } if (string.IsNullOrEmpty(languageResolved) && modeStrict) { return(null); } if (!string.IsNullOrEmpty(languageResolved)) { var termsList = new string[] { "en", "eng", "engl", "English", "Englisch" }; var regEx = new Regex(@"\b(" + string.Join("|", termsList) + @")\b", RegexOptions.IgnoreCase); if (!regEx.IsMatch(languageResolved)) { return(null); } } } // var textUnits = componentPart.GetTextUnitsUnfiltered(citation, template); if (textUnits == null || !textUnits.Any()) { return(null); } //Expressions that must not be changed with regards to capitalization List <string> printAsStatedExpressions = new List <string>() { "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", "Austria", "Belgium", "Croatia", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Great Britain", "Hungary", "Iceland", "Ireland", "Italy", "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Norway", "Poland", "Portugal", "Russia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom", "UK", "European Union", "European", "United Nations", "UN", "Canada", "Canadadian", "Japan", "Japanese", "US‐Japanese", "U", "US", "USA", "United States of America", "American", "Amsterdam", "Brussels", "Cologne", "Columbia", "Den Haag", "London", "Moscow", "Munich", "Paris", "Vienna", "Zurich", "AG", "ARD", "ZDF", "GmbH", "WDR", "Amazon", "Google", "Microsoft", "CEO", "CEOs", "CFO", "CSR", "DAX", "F&E", "I", "Inc", "Ltd", "MBA", "M&A", "M&As", "NASDAQ", "R&D", "VC", "VCs" }; printAsStatedExpressions.Sort((x, y) => y.Length.CompareTo(x.Length)); //descending: longer ones first //Break the input text into a list of words at whitespaces, //hyphens, opening parens, and ASCII quotation marks //as well as the above printAsStatedExpressions string allInterpunctuation = @"(\s)|(-)|(\()|(\))|("")|(„)|(“)|(“)|(”)|(‘)|(’)|(«)|(»)|(\.)|(:)|(\?)|(!)"; string splitPattern = printAsStatedExpressions.Count == 0 ? allInterpunctuation : string.Format(@"({0})", String.Join("|", printAsStatedExpressions.Select(x => string.Format(@"\b{0}\b", Regex.Escape(x))))) + "|" + allInterpunctuation; string interpunctuactionFollowedByCapitalization = @"(\.)|(:)|(\?)|(!)"; //next word will be capitalized if possible bool firstWordDone = false; for (int i = 0; i < textUnits.Count; i++) { //textUnit.Text = textUnits[i].Text.ToLower(culture); var text = textUnits[i].Text; List <string> words = Regex.Split(text, splitPattern, RegexOptions.IgnoreCase).Where(x => !string.IsNullOrEmpty(x)).ToList(); text = string.Empty; for (int j = 0; j < words.Count; j++) { var word = words[j].ToString(); if (Regex.IsMatch(word, allInterpunctuation) || word.Equals(" ")) { //space or punctuation text = text + word; continue; } string printAsStatedExpression = printAsStatedExpressions.FirstOrDefault(ex => ex.Equals(word, StringComparison.OrdinalIgnoreCase)); if (!string.IsNullOrEmpty(printAsStatedExpression)) { text = text + printAsStatedExpression; firstWordDone = true; continue; } if (((i == 0) && (j == 0)) || !firstWordDone) { text = text + ToUpperFirstLetter(word, culture); firstWordDone = true; } else if (upperCaseAfterPunctuation && ((j > 0 && Regex.IsMatch(words[j - 1], interpunctuactionFollowedByCapitalization)) || (j > 1 && Regex.IsMatch(words[j - 2], interpunctuactionFollowedByCapitalization)))) { text = text + ToUpperFirstLetter(word, culture); firstWordDone = true; } else { text = text + word.ToLower(culture); firstWordDone = true; } } textUnits[i].Text = text; } handled = true; return(textUnits); }
public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled) { var ensureEnglishIsReferenceLanguage = true; //if set to false, the component part filter will ALWAYS capitalize, regardless of the reference's language var modeStrict = false; //only applicable if ensureEnglishIsReferenceLanguage = true: //if modeStrict = true, it will only capitalize references that have "en" or "eng" etc. in the language field //if modeStrict = false, it will also capitalize references that have an empty language field var convertFullUpperCaseWords = ConvertFullUpperCaseWords.Never; #region Info on ConvertFullUpperCaseWords parameter /* * Example 1: UN and US government made agreement on payments of contribution * Example 2: UN AND US GOVERNMENT MADE AGREEMENT ON PAYMENTS OF CONTRIBUTION * ConvertFullUpperCaseWords.Never (default) * Result 1: UN and US Government Made Agreement on Payments of Contribution * Result 2: UN and US GOVERNMENT MADE AGREEMENT on PAYMENTS of CONTRIBUTION * ConvertFullUpperCaseWords.Always: * Result 1: Un and Us Government Made Agreement on Payments of Contribution * Result 2: Un and Us Government Made Agreement on Payments of Contribution * ConvertFullUpperCaseWords.Auto: * Result 1: UN and US Government Made Agreement on Payments of Contribution * Result 2: Un and Us Government Made Agreement on Payments of Contribution */ #endregion CultureInfo culture = CultureInfo.CurrentCulture; handled = false; if (citation == null) { return(null); } if (citation.Reference == null) { return(null); } if (componentPart == null) { return(null); } if (template == null) { return(null); } if (ensureEnglishIsReferenceLanguage) { string languageResolved = citation.Reference.Language; if (componentPart.Scope == ComponentPartScope.Reference) { //if ComponentPartScope is Reference, language can come from Reference or ParentReference if (string.IsNullOrEmpty(languageResolved) && citation.Reference.ParentReference != null) { languageResolved = citation.Reference.ParentReference.Language; } if (string.IsNullOrEmpty(languageResolved) && modeStrict) { return(null); } } else { //if ComponentPartScope is ParentReference, language MUST come from ParentReference if (citation.Reference.ParentReference == null) { return(null); } languageResolved = citation.Reference.ParentReference.Language; } if (string.IsNullOrEmpty(languageResolved) && modeStrict) { return(null); } if (!string.IsNullOrEmpty(languageResolved)) { var termsList = new string[] { "en", "eng", "engl", "English", "Englisch" }; var regEx = new Regex(@"\b(" + string.Join("|", termsList) + @")\b", RegexOptions.IgnoreCase); if (!regEx.IsMatch(languageResolved)) { return(null); } } } //Words that will not be capitalized; add words to this list as required string[] exceptionsArray = { "a", "an", "and", "as", "at", "but", "by", "down", "for", "from", "in", "into", "nor", "of", "on", "onto", "or", "over", "so", "the", "till", "to", "up", "via", "with", "yet" }; List <string> exceptions = new List <string>(exceptionsArray); var textUnits = componentPart.GetTextUnitsUnfiltered(citation, template); if (textUnits == null || !textUnits.Any()) { return(null); } string fullString = textUnits.ToString(); bool fullUpperCaseTreatment = false; switch (convertFullUpperCaseWords) { case ConvertFullUpperCaseWords.Always: fullUpperCaseTreatment = true; break; case ConvertFullUpperCaseWords.Never: { fullUpperCaseTreatment = false; } break; default: case ConvertFullUpperCaseWords.Auto: { if (HasLowerCase(fullString)) { fullUpperCaseTreatment = false; } else { fullUpperCaseTreatment = true; } } break; } string prevWord = string.Empty; string secondPrevWord = string.Empty; string nextWord = string.Empty; List <string> words = null; List <string> nextWords = null; //Break the input text into a list of words at whitespaces, //hyphens, opening parens, and ASCII quotation marks string splitPattern = @"(\s)|(-)|(\()|(\))|(\[)|(\])|(\"")|(\')|(\u2018)|(\u2019)|(\u201A)|(\u201C)|(\u201D)|(\u201E)|(\u201F)|(\u2039)|(\u203A)|(\u00AB)|(\u00BB)|(\.)|(:)|(\?)|(!)|(\u2014)"; string matchInterpunctuation = @"\.|:|\?|!|\u2014"; string matchQuotationMarks = @"\""|\u2018|\u2019|\u201A|\u201C|\u201D|\u201E|\u201F|\u2039|\u203A|\u00AB|\u00BB"; string matchApostrophe = @"'|\u2019"; //further 'FALSE' apostrophe characters: \u02bc, \u02c8, \u00b4, \u0060, \u2018, \u2032, \u02bb #region Infos about unicode characters used /* * \u0027 Apostrophe * \u005B Left Square Bracket * \u005D Right Square Bracket * \u2014 Geviertstrich * \u2018 Left Single Quotation Mark * \u2019 Right Single Quotation Mark * \u201A Single Low-9 Quotation Mark * \u201C Left Double Quotation Mark * \u201D Right Double Quotation Mark * \u201E Double Low-9 Quotation Mark * \u201F Double High-Reversed-9 Quotation Mark * \u2039 Single Left-Pointing Angle Quotation Mark * \u203A Single Right-Pointing Angle Quotation Mark * \u00AB Double Left-Pointing Angle Quotation Mark * \u00BB Double Right-Pointing Angle Quotation Mark */ #endregion for (int i = 0; i < textUnits.Count; i++) { //textUnit.Text = textUnits[i].Text.ToLower(culture); var text = textUnits[i].Text; var nextText = i < textUnits.Count - 1 ? textUnits[i + 1].Text : null; words = i > 0 ? nextWords : new List <string>(Regex.Split(text, splitPattern).Where(s => s != string.Empty)); nextWords = !string.IsNullOrEmpty(nextText) ? new List <string>(Regex.Split(nextText, splitPattern).Where(s => s != string.Empty)) : new List <string>(); var counter = 0; text = string.Empty; //Check each remaining word against the list, and append it to the new text. //Leave words in upper case unchanged, unless they appear in the exception list. foreach (string word in words) { counter++; nextWord = counter < words.Count ? words.ElementAt(counter) : nextWords != null && nextWords.Any() ? nextWords.First() : null; if (Regex.IsMatch(word, matchInterpunctuation) || string.IsNullOrWhiteSpace(word)) { //punctuation text = text + word; } else if (counter == 1 && i == 0) // overall first word, i.e. first word in first textunit { text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture); } else if (word.Length == 1 && !string.IsNullOrEmpty(nextWord) && nextWord == ".") { //one letter word followed by period is considered a first name initial text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture); } else if ( (Regex.IsMatch(prevWord, matchInterpunctuation)) || (!string.IsNullOrWhiteSpace(secondPrevWord) && Regex.IsMatch(secondPrevWord, matchInterpunctuation) && string.IsNullOrWhiteSpace(prevWord)) ) { text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture); //capitalize also stopwords directly after interpunctuation } else if (Regex.IsMatch(prevWord, matchApostrophe) && !string.IsNullOrWhiteSpace(secondPrevWord)) { text = text + word.ToLower(culture); } else if (Regex.IsMatch(prevWord, matchQuotationMarks)) // capitalize also stopwords directly after quotation marks { text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture); } else if (exceptions.Contains(word.ToLower(culture))) // check list of exceptions { text = text + word.ToLower(culture); } else // in all other cases: capitalize { text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture); } secondPrevWord = prevWord; prevWord = word; // save current word as previous word for next iteration } textUnits[i].Text = text; } handled = true; return(textUnits); }