Example #1
0
        public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled)
        {
            handled = false;

            if (componentPart == null)
            {
                return(null);
            }
            if (citation == null || citation.Reference == null)
            {
                return(null);
            }

            var periodical = citation.Reference.Periodical;

            if (periodical == null)
            {
                return(null);
            }
            var notesString = periodical.Notes;

            if (string.IsNullOrWhiteSpace(notesString))
            {
                return(null);
            }

            var periodicalFieldElement = componentPart.GetFieldElements().FirstOrDefault <FieldElement>(item => item.PropertyId == ReferencePropertyId.Periodical);

            if (periodicalFieldElement == null)
            {
                return(null);
            }

            var output = componentPart.GetTextUnitsUnfiltered(citation, template);

            var notesPrefixTextUnit = new LiteralTextUnit(" [");

            notesPrefixTextUnit.FontStyle = FontStyle.Neutral;

            var notesTextUnit = new LiteralTextUnit(notesString);

            notesTextUnit.FontStyle = FontStyle.Neutral;

            var notesSuffixTextUnit = new LiteralTextUnit("]");

            notesSuffixTextUnit.FontStyle = FontStyle.Neutral;

            output.Add(notesPrefixTextUnit);
            output.Add(notesTextUnit);
            output.Add(notesSuffixTextUnit);

            handled = true;
            return(output);
        }
Example #2
0
        public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled)
        {
            handled = false;

            if (template == null)
            {
                return(null);
            }

            if (citation == null)
            {
                return(null);
            }
            if (citation.Reference == null)
            {
                return(null);
            }

            if (componentPart == null)
            {
                return(null);
            }
            if (componentPart.Elements == null)
            {
                return(null);
            }


            //let Citavi do the apply conditions on elements inside the component part
            var originalTextUnits = componentPart.GetTextUnitsUnfiltered(citation, template);

            if (originalTextUnits == null)
            {
                return(null);
            }


            var outputTextUnits = new TextUnitCollection();

            foreach (var textUnit in originalTextUnits)
            {
                var literalTextUnit = textUnit as LiteralTextUnit;
                if (literalTextUnit != null)
                {
                    var text = literalTextUnit.Text;
                    if (!string.IsNullOrEmpty(text))
                    {
                        var originalFontStyle = literalTextUnit.FontStyle;
                        if (literalTextUnit.HasTemporaryFontStyle())
                        {
                            originalFontStyle |= literalTextUnit.TemporaryFontStyle;
                        }
                        var newTextUnits = literalTextUnit.LiteralElement.TaggedTextToTextUnits(text, originalFontStyle);
                        if (newTextUnits != null && newTextUnits.Count > 0)
                        {
                            outputTextUnits.AddRange(newTextUnits);
                        }
                    }
                    continue;
                }

                var fieldTextUnit = textUnit as FieldTextUnit;
                if (fieldTextUnit != null)
                {
                    var text = fieldTextUnit.Text;
                    if (!string.IsNullOrEmpty(text))
                    {
                        var originalFontStyle = fieldTextUnit.FontStyle;
                        if (fieldTextUnit.HasTemporaryFontStyle())
                        {
                            originalFontStyle |= fieldTextUnit.TemporaryFontStyle;
                        }
                        var newTextUnits = fieldTextUnit.FieldElement.TaggedTextToTextUnits(text, originalFontStyle);
                        if (newTextUnits != null && newTextUnits.Count > 0)
                        {
                            outputTextUnits.AddRange(newTextUnits);
                        }
                    }
                }
            }

            handled = true;
            return(outputTextUnits);
        }
Example #3
0
        //Version 3.0: complete overhaul, script considers different output for placehoder citations and bibliography citations
        //Version 2.0: script can be attached to both date/time field element as well as text field element

        public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled)
        {
            //enter the culture the date info has been formatted and entered in, e.g. 12/05/2017 would be December, 12th in en-UK and May, 5th in en-US
            CultureInfo targetCulture = CultureInfo.CreateSpecificCulture("en-US");

            //list all possible date formats for this script to check; the scripts tries to parse the date beginning from left to right
            string[] formats = new string[] { "yyyy-MM-dd", "yyyy/MM/dd", "dd/MM/yyyy", "yyyy/dd/MM", "dd.MM.yyyy", "d.M.yyyy", "d.MM.yyyy", "dd.M.yyyy", "dd.MM.yy", "d.M.yy", "d.MM.yy", "dd.M.yy" };

            bool usePeriodAfterAbbreviatedMonthName = true;                     //if true, month names will be: Jan. Feb. Mar. Apr. May (!) Jun. Jul. Aug. Sept. Oct. Nov. Dec.

            ///IMPORTANT: Use the following indexed placeholders {n} and format strings xxxx as in {n:xxxx} for the templates.
            ///You can ommit placeholders and/or place them freely inside the templates below. Yet, it is not recommended to use the same placeholder more than once,
            ///because this script is not optimized for this.
            ///
            ///{0}: letter for ambiguity resolving
            ///{1}: year	of start or single date
            ///{2}: month	of start or single date
            ///{3}: day     of start or single date
            ///{4}: year	of end date
            ///{5}: month	of end date
            ///{6}: day     of end date
            ///use the following formatting for "6 June 2018"
            ///YEAR:	yyyy = 2018, yy = 18
            ///MONTH:	MMMM = June, MMM = Jun, MM = 06, %M = 6
            ///DAY:		dd = 06, %d = 6, %do = 6th

            //SINGLE DATE - output format templates
            string outputFormatSingleDatePlaceholder  = "{1:yyyy}{0}";                                                                                                                                                          //e.g. 2013a
            string outputFormatSingleDateBibliography = "{1:yyyy}{0}, {2:MMMM} {3:%do}";                                                                                                                                        //e.g. 2013a, January 6th

            //DATE RANGE - output format templates
            //same year, same month
            string outputFormatDateRangeSameYearSameMonthPlaceholder  = "{1:yyyy}{0}";                                                                                                                  //e.g. 2013a
            string outputFormatDateRangeSameYearSameMonthBibliography = "{1:yyyy}{0}, {2:MMMM} {3:%do} - {6:%do}";                                                                                      //e.g. 2013a, January 6th - 9th

            //same year, different month
            string outputFormatDateRangeSameYearDifferentMonthPlaceholder  = "{1:yyyy}{0}";                                                                                             //e.g. 2013a
            string outputFormatDateRangeSameYearDifferentMonthBibliography = "{1:yyyy}{0}, {2:MMMM} {3:%do} - {5:MMMM} {6:%do}";                                                        //e.g. 2013a, September 28th - October 3rd

            //different years
            string outputFormatDateRangeDifferentYearsPlaceholder  = "{1:yyyy}/{4:yyyy}{0}";                                                                            //e.g. 2013/2014a
            string outputFormatDateRangeDifferentYearsBibliography = "{1:yyyy}/{4:yyyy}{0}; {1:yyyy}, {2:MMMM} {3:%do} - {4:yyyy}, {5:MMMM} {6:%do}";                   //e.g. 2013/2014a; 2013, December 29th - 2014, January 4th

            handled = false;

            if (citation == null)
            {
                return(null);
            }

            Reference referenceInScope = GetReferenceInScope(componentPart, citation);

            if (referenceInScope == null)
            {
                return(null);
            }

            FieldElement dateFieldElement = GetDateFieldElement(componentPart);

            if (dateFieldElement == null)
            {
                return(null);
            }

            ReferencePropertyId referencePropertyId = dateFieldElement.PropertyId;
            string dateString = referenceInScope.GetValue(referencePropertyId) as string;

            if (string.IsNullOrEmpty(dateString))
            {
                return(null);
            }

            TextUnitCollection output = null;

            PlaceholderCitation placeholderCitation = citation as PlaceholderCitation;
            bool isPlaceholderCitation = placeholderCitation != null;

            PreviewCitation previewCitation = citation as PreviewCitation;
            bool            isPreviewBibliographyCitation = previewCitation != null && citation.CitationType == CitationType.Bibliography;

            BibliographyCitation bibliographyCitation = citation as BibliographyCitation;
            bool isBibliographyCitation = bibliographyCitation != null;

            if (bibliographyCitation == null && placeholderCitation != null)
            {
                bibliographyCitation = placeholderCitation.CorrespondingBibliographyCitation;
            }
            if (bibliographyCitation == null && !isPreviewBibliographyCitation)
            {
                return(null);
            }


            string          identifyingLetter         = bibliographyCitation != null ? bibliographyCitation.IdentifyingLetter : string.Empty;
            LiteralTextUnit identifyingLetterTextUnit = new LiteralTextUnit(identifyingLetter, Drawing.FontStyle.Neutral);
            bool            hasIdentifyingLetter      = !string.IsNullOrEmpty(identifyingLetter);

            #region Tread n.d. + letter for disambiguation ("IdentifyingLetter")

            if (hasIdentifyingLetter && ContainsND(dateString))
            {
                //we make sure the IdentifyingLetter is separated from n.d. by a space char or hyphen: Smith n.d.-a, Smith n.d.-b
                //go to method SeparateIdentifyingLetterFromND below to customize
                output = componentPart.GetTextUnitsUnfiltered(citation, template);
                if (output == null || !output.Any())
                {
                    return(null);
                }

                handled = true;
                return(SeparateIdentifyingLetterFromND(output, identifyingLetter));
            }

            #endregion

            FontStyle fontStyle = dateFieldElement is DateTimeFieldElement ? ((DateTimeFieldElement)dateFieldElement).FontStyle : ((TextFieldElement)dateFieldElement).FontStyle;

            DateTime dateSingle;
            DateTime dateStart;
            DateTime dateEnd;

            string outputText = string.Empty;

            #region Check for Single Date

            if (TryParseSingleDate(dateString, formats, targetCulture, out dateSingle))
            {
                #region BibliographyCitation

                if (isBibliographyCitation || isPreviewBibliographyCitation)
                {
                    outputText = FormatDate(dateSingle, outputFormatSingleDateBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                }

                #endregion

                #region PlaceholderCitation

                else if (isPlaceholderCitation)
                {
                    outputText = FormatDate(dateSingle, outputFormatSingleDatePlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                }

                #endregion

                #region Other

                else
                {
                    handled = false;
                    return(null);
                }

                #endregion
            }

            #endregion

            #region Check for Date Range

            else if (TryParseDateRange(dateString, formats, targetCulture, out dateStart, out dateEnd))
            {
                #region BibliographyCitation

                if (isBibliographyCitation || isPreviewBibliographyCitation)
                {
                    #region same year, same month

                    if (dateStart.Year == dateEnd.Year && dateStart.Month == dateEnd.Month && dateStart.Day != dateEnd.Day)
                    {
                        outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearSameMonthBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                    }

                    #endregion

                    #region same year, different months

                    else if (dateStart.Year == dateEnd.Year && dateStart.Month != dateEnd.Month)
                    {
                        outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearDifferentMonthBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                    }

                    #endregion

                    #region different years

                    else
                    {
                        outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeDifferentYearsBibliography, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                    }

                    #endregion
                }

                #endregion

                #region PlaceholderCitation

                else if (isPlaceholderCitation)
                {
                    #region same year, same month

                    if (dateStart.Year == dateEnd.Year && dateStart.Month == dateEnd.Month && dateStart.Day != dateEnd.Day)
                    {
                        outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearSameMonthPlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                    }

                    #endregion

                    #region same year, different months

                    else if (dateStart.Year == dateEnd.Year && dateStart.Month != dateEnd.Month)
                    {
                        outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeSameYearDifferentMonthPlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                    }

                    #endregion

                    #region different years

                    else
                    {
                        outputText = FormatDateRange(dateStart, dateEnd, outputFormatDateRangeDifferentYearsPlaceholder, targetCulture, identifyingLetter, usePeriodAfterAbbreviatedMonthName);
                    }

                    #endregion
                }

                #endregion

                #region Other

                else
                {
                    handled = false;
                    return(null);
                }

                #endregion
            }

            #endregion

            #region Do the output

            if (!string.IsNullOrEmpty(outputText))
            {
                var outputTextUnits = new TextUnitCollection();
                outputTextUnits = TextUnitCollectionUtility.TaggedTextToTextUnits(dateFieldElement, outputText, fontStyle);

                if (outputTextUnits.Any())
                {
                    List <ITextUnit> componentPartOutput = new List <ITextUnit>();
                    foreach (IElement element in componentPart.Elements)
                    {
                        if (element == dateFieldElement)
                        {
                            componentPartOutput.AddRange(outputTextUnits);
                        }
                        else
                        {
                            componentPartOutput.AddRange(element.GetTextUnits(citation, template));
                        }
                    }
                    handled = true;
                    return(componentPartOutput);
                }
            }

            #endregion

            handled = false;
            return(null);
        }
Example #4
0
        public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled)
        {
            var ensureEnglishIsReferenceLanguage = true;                //if set to false, the component part filter will ALWAYS capitalize, regardless of the reference's language
            var upperCaseAfterPunctuation        = true;                //if set to false, everything but the very first word will be lower case

            CultureInfo culture = CultureInfo.CurrentCulture;

            handled = false;

            if (citation == null)
            {
                return(null);
            }
            if (citation.Reference == null)
            {
                return(null);
            }
            if (componentPart == null)
            {
                return(null);
            }
            if (template == null)
            {
                return(null);
            }

            if (ensureEnglishIsReferenceLanguage)
            {
                string languageResolved = citation.Reference.Language;
                if (componentPart.Scope == ComponentPartScope.Reference)
                {
                    //if ComponentPartScope is Reference, language can come from Reference or ParentReference
                    if (string.IsNullOrEmpty(languageResolved) && citation.Reference.ParentReference != null)
                    {
                        languageResolved = citation.Reference.ParentReference.Language;
                    }
                    if (string.IsNullOrEmpty(languageResolved))
                    {
                        return(null);
                    }
                }
                else
                {
                    //if ComponentPartScope is ParentReference, language MUST come from ParentReference
                    if (citation.Reference.ParentReference == null)
                    {
                        return(null);
                    }
                    languageResolved = citation.Reference.ParentReference.Language;
                }
                if (string.IsNullOrEmpty(languageResolved))
                {
                    return(null);
                }

                var termsList = new string[] {
                    "en",
                    "eng",
                    "engl",
                    "english",
                    "Englisch"
                };

                var regEx = new Regex(@"\b(" + string.Join("|", termsList) + @")\b", RegexOptions.IgnoreCase);
                if (!regEx.IsMatch(languageResolved))
                {
                    return(null);
                }
            }


            var textUnits = componentPart.GetTextUnitsUnfiltered(citation, template);

            if (textUnits == null || !textUnits.Any())
            {
                return(null);
            }

            //Expressions that must not be changed with regards to capitalization
            List <string> printAsStatedExpressions = new List <string>()
            {
                "US", "USA", "UK", "UN", "ZDF", "ARD", "GmbH", "WDR",
                "Microsoft", "Google",
                "Cologne", "London", "Paris", "Moscow",
                "Germany", "France", "Italy", "Russia", "Sweden",
                "United Nations", "United States of America", "European Union", "CEO", "CSR", "VC", "VCs", "American", "CEOs"
            };

            printAsStatedExpressions.Sort((x, y) => y.Length.CompareTo(x.Length));             //descending: longer ones first

            //Break the input text into a list of words at whitespaces,
            //hyphens, opening parens, and ASCII quotation marks
            //as well as the above doNotTouchExpressions
            string splitInterpunctuation = @"(\s)|(-)|(\()|(\))|(\"")|(\.)|(:)|(\?)|(!)";
            string splitPattern          = printAsStatedExpressions.Count == 0 ?
                                           splitInterpunctuation :
                                           string.Format(@"({0})", String.Join("|", printAsStatedExpressions.Select(x => string.Format(@"\b{0}\b", Regex.Escape(x))))) + "|" + splitInterpunctuation;

            string matchInterpunctuation = @"(\.)|(:)|(\?)|(!)";


            for (int i = 0; i < textUnits.Count; i++)
            {
                //textUnit.Text = textUnits[i].Text.ToLower(culture);
                var text = textUnits[i].Text;

                List <string> words = Regex.Split(text, splitPattern, RegexOptions.IgnoreCase).Where(x => !string.IsNullOrEmpty(x)).ToList();

                text = string.Empty;

                for (int j = 0; j < words.Count; j++)
                {
                    var word = words[j].ToString();

                    if (Regex.IsMatch(word, matchInterpunctuation) || word.Equals(" "))
                    {
                        //space or punctuation
                        text = text + word;
                        continue;
                    }

                    string printAsStatedExpression = printAsStatedExpressions.FirstOrDefault(ex => ex.Equals(word, StringComparison.OrdinalIgnoreCase));
                    if (!string.IsNullOrEmpty(printAsStatedExpression))
                    {
                        text = text + printAsStatedExpression;
                        continue;
                    }

                    if ((i == 0) && (j == 0))
                    {
                        text = text + ToUpperFirstLetter(word, culture);
                    }
                    else if (upperCaseAfterPunctuation && ((j > 0 && Regex.IsMatch(words[j - 1], matchInterpunctuation)) || (j > 1 && Regex.IsMatch(words[j - 2], matchInterpunctuation))))
                    {
                        text = text + ToUpperFirstLetter(word, culture);
                    }
                    else
                    {
                        text = text + word.ToLower(culture);
                    }
                }
                textUnits[i].Text = text;
            }

            handled = true;
            return(textUnits);
        }
Example #5
0
        public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled)
        {
            handled = false;

            if (componentPart == null)
            {
                return(null);
            }
            if (citation == null || citation.Reference == null)
            {
                return(null);
            }

            var personFieldElementCount = componentPart.Elements.Where(fieldElement => fieldElement is PersonFieldElement).Count();

            if (personFieldElementCount != 1)
            {
                return(null);
            }

            var personFieldElement = componentPart.GetFieldElements().FirstOrDefault(fieldElement => fieldElement is PersonFieldElement) as PersonFieldElement;

            if (personFieldElement == null)
            {
                return(null);
            }

            if (!personFieldElement.Abbreviate)
            {
                return(null);
            }
            var abbreviation = personFieldElement.Abbreviation;

            if (abbreviation == null)
            {
                return(null);
            }
            if (string.IsNullOrWhiteSpace(abbreviation.Text))
            {
                return(null);
            }

            if (abbreviation.Text != ", et al.")
            {
                return(null);
            }


            var output = componentPart.GetTextUnitsUnfiltered(citation, template);
            var index  = 0;

            foreach (ITextUnit textUnit in output)
            {
                if (textUnit.Text == ", et al.")
                {
                    var newTextUnit = new LiteralTextUnit("et al.");
                    newTextUnit.FontStyle = textUnit.FontStyle;

                    textUnit.Text      = ", ";
                    textUnit.FontStyle = FontStyle.Neutral;
                    output.Insert(++index, newTextUnit);

                    break;
                }
                index++;
            }

            handled = true;
            return(output);
        }
Example #6
0
        public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled)
        {
            var ensureEnglishIsReferenceLanguage = true;                //if set to false, the component part filter will ALWAYS capitalize, regardless of the reference's language
            var upperCaseAfterPunctuation        = true;                //if set to false, everything but the very first word will be lower case
            var modeStrict = false;                                     //only applicable if ensureEnglishIsReferenceLanguage = true:
            //if modeStrict = true, it will only capitalize references that have "en" or "eng" etc. in the language field
            //if modeStrict = false, it will also capitalize references that have an empty language field

            CultureInfo culture = CultureInfo.CurrentCulture;

            handled = false;

            if (citation == null)
            {
                return(null);
            }
            if (citation.Reference == null)
            {
                return(null);
            }
            if (componentPart == null)
            {
                return(null);
            }
            if (template == null)
            {
                return(null);
            }

            if (ensureEnglishIsReferenceLanguage)
            {
                string languageResolved = citation.Reference.Language;
                if (componentPart.Scope == ComponentPartScope.Reference)
                {
                    //if ComponentPartScope is Reference, language can come from Reference or ParentReference
                    if (string.IsNullOrEmpty(languageResolved) && citation.Reference.ParentReference != null)
                    {
                        languageResolved = citation.Reference.ParentReference.Language;
                    }
                    if (string.IsNullOrEmpty(languageResolved) && modeStrict)
                    {
                        return(null);
                    }
                }
                else
                {
                    //if ComponentPartScope is ParentReference, language MUST come from ParentReference
                    if (citation.Reference.ParentReference == null)
                    {
                        return(null);
                    }
                    languageResolved = citation.Reference.ParentReference.Language;
                }
                if (string.IsNullOrEmpty(languageResolved) && modeStrict)
                {
                    return(null);
                }

                if (!string.IsNullOrEmpty(languageResolved))
                {
                    var termsList = new string[] {
                        "en",
                        "eng",
                        "engl",
                        "English",
                        "Englisch"
                    };


                    var regEx = new Regex(@"\b(" + string.Join("|", termsList) + @")\b", RegexOptions.IgnoreCase);
                    if (!regEx.IsMatch(languageResolved))
                    {
                        return(null);
                    }
                }
            }                   //


            var textUnits = componentPart.GetTextUnitsUnfiltered(citation, template);

            if (textUnits == null || !textUnits.Any())
            {
                return(null);
            }

            //Expressions that must not be changed with regards to capitalization
            List <string> printAsStatedExpressions = new List <string>()
            {
                "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
                "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December",
                "Austria", "Belgium", "Croatia", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece",
                "Great Britain", "Hungary", "Iceland", "Ireland", "Italy", "Latvia", "Liechtenstein", "Lithuania", "Luxembourg", "Malta",
                "Netherlands", "Norway", "Poland", "Portugal", "Russia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", "Turkey",
                "United Kingdom", "UK", "European Union", "European", "United Nations", "UN",
                "Canada", "Canadadian", "Japan", "Japanese", "US‐Japanese", "U", "US", "USA", "United States of America", "American",
                "Amsterdam", "Brussels", "Cologne", "Columbia", "Den Haag", "London", "Moscow", "Munich", "Paris", "Vienna", "Zurich",
                "AG", "ARD", "ZDF", "GmbH", "WDR", "Amazon", "Google", "Microsoft",
                "CEO", "CEOs", "CFO", "CSR", "DAX", "F&E", "I", "Inc", "Ltd", "MBA", "M&A", "M&As", "NASDAQ", "R&D", "VC", "VCs"
            };

            printAsStatedExpressions.Sort((x, y) => y.Length.CompareTo(x.Length));             //descending: longer ones first

            //Break the input text into a list of words at whitespaces,
            //hyphens, opening parens, and ASCII quotation marks
            //as well as the above printAsStatedExpressions
            string allInterpunctuation = @"(\s)|(-)|(\()|(\))|("")|(„)|(“)|(“)|(”)|(‘)|(’)|(«)|(»)|(\.)|(:)|(\?)|(!)";
            string splitPattern        = printAsStatedExpressions.Count == 0 ?
                                         allInterpunctuation :
                                         string.Format(@"({0})", String.Join("|", printAsStatedExpressions.Select(x => string.Format(@"\b{0}\b", Regex.Escape(x))))) + "|" + allInterpunctuation;

            string interpunctuactionFollowedByCapitalization = @"(\.)|(:)|(\?)|(!)";             //next word will be capitalized if possible
            bool   firstWordDone = false;

            for (int i = 0; i < textUnits.Count; i++)
            {
                //textUnit.Text = textUnits[i].Text.ToLower(culture);
                var text = textUnits[i].Text;

                List <string> words = Regex.Split(text, splitPattern, RegexOptions.IgnoreCase).Where(x => !string.IsNullOrEmpty(x)).ToList();

                text = string.Empty;

                for (int j = 0; j < words.Count; j++)
                {
                    var word = words[j].ToString();

                    if (Regex.IsMatch(word, allInterpunctuation) || word.Equals(" "))
                    {
                        //space or punctuation
                        text = text + word;
                        continue;
                    }

                    string printAsStatedExpression = printAsStatedExpressions.FirstOrDefault(ex => ex.Equals(word, StringComparison.OrdinalIgnoreCase));
                    if (!string.IsNullOrEmpty(printAsStatedExpression))
                    {
                        text          = text + printAsStatedExpression;
                        firstWordDone = true;
                        continue;
                    }

                    if (((i == 0) && (j == 0)) || !firstWordDone)
                    {
                        text          = text + ToUpperFirstLetter(word, culture);
                        firstWordDone = true;
                    }
                    else if (upperCaseAfterPunctuation && ((j > 0 && Regex.IsMatch(words[j - 1], interpunctuactionFollowedByCapitalization)) || (j > 1 && Regex.IsMatch(words[j - 2], interpunctuactionFollowedByCapitalization))))
                    {
                        text          = text + ToUpperFirstLetter(word, culture);
                        firstWordDone = true;
                    }
                    else
                    {
                        text          = text + word.ToLower(culture);
                        firstWordDone = true;
                    }
                }
                textUnits[i].Text = text;
            }

            handled = true;
            return(textUnits);
        }
Example #7
0
        public IEnumerable <ITextUnit> GetTextUnits(ComponentPart componentPart, Template template, Citation citation, out bool handled)
        {
            var ensureEnglishIsReferenceLanguage = true;                //if set to false, the component part filter will ALWAYS capitalize, regardless of the reference's language
            var modeStrict = false;                                     //only applicable if ensureEnglishIsReferenceLanguage = true:
            //if modeStrict = true, it will only capitalize references that have "en" or "eng" etc. in the language field
            //if modeStrict = false, it will also capitalize references that have an empty language field

            var convertFullUpperCaseWords = ConvertFullUpperCaseWords.Never;

            #region Info on ConvertFullUpperCaseWords parameter

            /*
             *                  Example 1: UN and US government made agreement on payments of contribution
             *                  Example 2: UN AND US GOVERNMENT MADE AGREEMENT ON PAYMENTS OF CONTRIBUTION
             *                  ConvertFullUpperCaseWords.Never (default)
             *                  Result 1: UN and US Government Made Agreement on Payments of Contribution
             *                  Result 2: UN and US GOVERNMENT MADE AGREEMENT on PAYMENTS of CONTRIBUTION
             *                  ConvertFullUpperCaseWords.Always:
             *                  Result 1: Un and Us Government Made Agreement on Payments of Contribution
             *                  Result 2: Un and Us Government Made Agreement on Payments of Contribution
             *                  ConvertFullUpperCaseWords.Auto:
             *                  Result 1: UN and US Government Made Agreement on Payments of Contribution
             *                  Result 2: Un and Us Government Made Agreement on Payments of Contribution
             */
            #endregion

            CultureInfo culture = CultureInfo.CurrentCulture;

            handled = false;

            if (citation == null)
            {
                return(null);
            }
            if (citation.Reference == null)
            {
                return(null);
            }

            if (componentPart == null)
            {
                return(null);
            }
            if (template == null)
            {
                return(null);
            }

            if (ensureEnglishIsReferenceLanguage)
            {
                string languageResolved = citation.Reference.Language;
                if (componentPart.Scope == ComponentPartScope.Reference)
                {
                    //if ComponentPartScope is Reference, language can come from Reference or ParentReference
                    if (string.IsNullOrEmpty(languageResolved) && citation.Reference.ParentReference != null)
                    {
                        languageResolved = citation.Reference.ParentReference.Language;
                    }
                    if (string.IsNullOrEmpty(languageResolved) && modeStrict)
                    {
                        return(null);
                    }
                }
                else
                {
                    //if ComponentPartScope is ParentReference, language MUST come from ParentReference
                    if (citation.Reference.ParentReference == null)
                    {
                        return(null);
                    }
                    languageResolved = citation.Reference.ParentReference.Language;
                }
                if (string.IsNullOrEmpty(languageResolved) && modeStrict)
                {
                    return(null);
                }


                if (!string.IsNullOrEmpty(languageResolved))
                {
                    var termsList = new string[] {
                        "en",
                        "eng",
                        "engl",
                        "English",
                        "Englisch"
                    };


                    var regEx = new Regex(@"\b(" + string.Join("|", termsList) + @")\b", RegexOptions.IgnoreCase);
                    if (!regEx.IsMatch(languageResolved))
                    {
                        return(null);
                    }
                }
            }

            //Words that will not be capitalized; add words to this list as required
            string[] exceptionsArray = { "a",   "an",   "and",  "as",  "at",
                                         "but", "by",   "down", "for", "from",
                                         "in",  "into", "nor",
                                         "of",  "on",   "onto", "or",  "over",
                                         "so",  "the",  "till", "to",
                                         "up",  "via",  "with", "yet" };

            List <string> exceptions = new List <string>(exceptionsArray);

            var textUnits = componentPart.GetTextUnitsUnfiltered(citation, template);
            if (textUnits == null || !textUnits.Any())
            {
                return(null);
            }

            string fullString             = textUnits.ToString();
            bool   fullUpperCaseTreatment = false;
            switch (convertFullUpperCaseWords)
            {
            case ConvertFullUpperCaseWords.Always:
                fullUpperCaseTreatment = true;
                break;

            case ConvertFullUpperCaseWords.Never:
            {
                fullUpperCaseTreatment = false;
            }
            break;

            default:
            case ConvertFullUpperCaseWords.Auto:
            {
                if (HasLowerCase(fullString))
                {
                    fullUpperCaseTreatment = false;
                }
                else
                {
                    fullUpperCaseTreatment = true;
                }
            }
            break;
            }

            string prevWord       = string.Empty;
            string secondPrevWord = string.Empty;
            string nextWord       = string.Empty;

            List <string> words     = null;
            List <string> nextWords = null;

            //Break the input text into a list of words at whitespaces,
            //hyphens, opening parens, and ASCII quotation marks
            string splitPattern = @"(\s)|(-)|(\()|(\))|(\[)|(\])|(\"")|(\')|(\u2018)|(\u2019)|(\u201A)|(\u201C)|(\u201D)|(\u201E)|(\u201F)|(\u2039)|(\u203A)|(\u00AB)|(\u00BB)|(\.)|(:)|(\?)|(!)|(\u2014)";

            string matchInterpunctuation = @"\.|:|\?|!|\u2014";
            string matchQuotationMarks   = @"\""|\u2018|\u2019|\u201A|\u201C|\u201D|\u201E|\u201F|\u2039|\u203A|\u00AB|\u00BB";
            string matchApostrophe       = @"'|\u2019"; //further 'FALSE' apostrophe characters: \u02bc, \u02c8, \u00b4, \u0060, \u2018, \u2032, \u02bb

            #region Infos about unicode characters used

            /*
             * \u0027  Apostrophe
             * \u005B  Left Square Bracket
             * \u005D  Right Square Bracket
             * \u2014  Geviertstrich
             * \u2018  Left Single Quotation Mark
             * \u2019  Right Single Quotation Mark
             * \u201A  Single Low-9 Quotation Mark
             * \u201C  Left Double Quotation Mark
             * \u201D  Right Double Quotation Mark
             * \u201E  Double Low-9 Quotation Mark
             * \u201F  Double High-Reversed-9 Quotation Mark
             * \u2039  Single Left-Pointing Angle Quotation Mark
             * \u203A  Single Right-Pointing Angle Quotation Mark
             * \u00AB  Double Left-Pointing Angle Quotation Mark
             * \u00BB  Double Right-Pointing Angle Quotation Mark
             */
            #endregion


            for (int i = 0; i < textUnits.Count; i++)
            {
                //textUnit.Text = textUnits[i].Text.ToLower(culture);
                var text     = textUnits[i].Text;
                var nextText = i < textUnits.Count - 1 ? textUnits[i + 1].Text : null;

                words     = i > 0 ? nextWords : new List <string>(Regex.Split(text, splitPattern).Where(s => s != string.Empty));
                nextWords = !string.IsNullOrEmpty(nextText) ?
                            new List <string>(Regex.Split(nextText, splitPattern).Where(s => s != string.Empty)) :
                            new List <string>();


                var counter = 0;
                text = string.Empty;

                //Check each remaining word against the list, and append it to the new text.
                //Leave words in upper case unchanged, unless they appear in the exception list.
                foreach (string word in words)
                {
                    counter++;
                    nextWord = counter < words.Count ? words.ElementAt(counter) : nextWords != null && nextWords.Any() ? nextWords.First() : null;


                    if (Regex.IsMatch(word, matchInterpunctuation) || string.IsNullOrWhiteSpace(word))
                    {
                        //punctuation
                        text = text + word;
                    }
                    else if (counter == 1 && i == 0) // overall first word, i.e. first word in first textunit
                    {
                        text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture);
                    }
                    else if (word.Length == 1 && !string.IsNullOrEmpty(nextWord) && nextWord == ".")
                    {
                        //one letter word followed by period is considered a first name initial
                        text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture);
                    }
                    else if (
                        (Regex.IsMatch(prevWord, matchInterpunctuation)) ||
                        (!string.IsNullOrWhiteSpace(secondPrevWord) && Regex.IsMatch(secondPrevWord, matchInterpunctuation) && string.IsNullOrWhiteSpace(prevWord))
                        )
                    {
                        text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture); //capitalize also stopwords directly after interpunctuation
                    }
                    else if (Regex.IsMatch(prevWord, matchApostrophe) && !string.IsNullOrWhiteSpace(secondPrevWord))
                    {
                        text = text + word.ToLower(culture);
                    }
                    else if (Regex.IsMatch(prevWord, matchQuotationMarks)) // capitalize also stopwords directly after quotation marks
                    {
                        text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture);
                    }
                    else if (exceptions.Contains(word.ToLower(culture))) // check list of exceptions
                    {
                        text = text + word.ToLower(culture);
                    }
                    else // in all other cases: capitalize
                    {
                        text = text + ToUpperFirstLetter(word, fullUpperCaseTreatment, culture);
                    }
                    secondPrevWord = prevWord;
                    prevWord       = word; // save current word as previous word for next iteration
                }
                textUnits[i].Text = text;
            }

            handled = true;
            return(textUnits);
        }