FixDatesAInternal() private method

private FixDatesAInternal ( string textPortion ) : string
textPortion string
return string
Example #1
0
        /// <summary>
        /// Sets persondata date of birth/death fields based on unformatted info in zeroth section of article, provided dates match existing birth/death categories
        /// </summary>
        /// <param name="personData">Persondata template call</param>
        /// <param name="articletext">The article text</param>
        /// <returns>The updated persondata template call</returns>
        private static string CompletePersonDataDate(string personData, string articletext)
        {
            // get the existing values
            string existingBirthYear = Tools.GetTemplateParameterValue(personData, "DATE OF BIRTH", true);
            string existingDeathYear = Tools.GetTemplateParameterValue(personData, "DATE OF DEATH", true);

            if (existingBirthYear.Length == 4 || existingDeathYear.Length == 4)
            {
                Parsers p = new Parsers();
                string birthDateFound = "", deathDateFound = "";
                string zerothSection = Tools.GetZerothSection(articletext);

                // remove references, wikilinks, templates
                zerothSection = WikiRegexes.Refs.Replace(zerothSection, " ");
                zerothSection = WikiRegexes.SimpleWikiLink.Replace(zerothSection, " ");

                if (WikiRegexes.CircaTemplate.IsMatch(zerothSection))
                    zerothSection = zerothSection.Substring(0, WikiRegexes.CircaTemplate.Match(zerothSection).Index);

                zerothSection = Tools.NestedTemplateRegex("ndash").Replace(zerothSection, " &ndash;");
                zerothSection = WikiRegexes.NestedTemplates.Replace(zerothSection, " ");
                // clean up any format errors in birth/death dates we may want to use
                zerothSection = p.FixDatesAInternal(zerothSection);

                // look for date in bracketed text, check date matches existing value (from categories)
                foreach (Match m in BracketedBirthDeathDate.Matches(zerothSection))
                {
                    string bValue = m.Value;

                    if (!UncertainWordings.IsMatch(bValue) && !ReignedRuledUnsure.IsMatch(bValue) && !FloruitTemplate.IsMatch(bValue))
                    {

                        string bBorn, bDied = "";
                        // split on died/spaced dash
                        if (FreeFormatDied.IsMatch(bValue))
                        {
                            bBorn = bValue.Substring(0, FreeFormatDied.Match(bValue).Index);
                            bDied = bValue.Substring(FreeFormatDied.Match(bValue).Index);
                        }
                        else
                            bBorn = bValue;

                        // born
                        if (existingBirthYear.Length == 4)
                        {
                            if (WikiRegexes.AmericanDates.Matches(bBorn).Count == 1 && WikiRegexes.AmericanDates.Match(bBorn).Value.Contains(existingBirthYear))
                                birthDateFound = WikiRegexes.AmericanDates.Match(bBorn).Value;
                            else if (WikiRegexes.InternationalDates.Matches(bBorn).Count == 1 && WikiRegexes.InternationalDates.Match(bBorn).Value.Contains(existingBirthYear))
                                birthDateFound = WikiRegexes.InternationalDates.Match(bBorn).Value;
                        }

                        // died
                        if (existingDeathYear.Length == 4)
                        {
                            if (WikiRegexes.AmericanDates.Matches(bDied).Count == 1 && WikiRegexes.AmericanDates.Match(bDied).Value.Contains(existingDeathYear))
                                deathDateFound = WikiRegexes.AmericanDates.Match(bDied).Value;
                            else if (WikiRegexes.InternationalDates.Matches(bDied).Count == 1 && WikiRegexes.InternationalDates.Match(bDied).Value.Contains(existingDeathYear))
                                deathDateFound = WikiRegexes.InternationalDates.Match(bDied).Value;
                        }

                        if (birthDateFound.Length > 0 || deathDateFound.Length > 0)
                            break;
                    }
                }

                if (birthDateFound.Length > 4)
                    personData = Tools.SetTemplateParameterValue(personData, "DATE OF BIRTH", Tools.ConvertDate(birthDateFound, DeterminePredominantDateLocale(articletext, true)), false);

                if (deathDateFound.Length > 4)
                    personData = Tools.SetTemplateParameterValue(personData, "DATE OF DEATH", Tools.ConvertDate(deathDateFound, DeterminePredominantDateLocale(articletext, true)), false);
            }

            return personData;
        }
Example #2
0
        /// <summary>
        /// Sets persondata date of birth/death fields based on unformatted info in zeroth section of article, provided dates match existing birth/death categories
        /// </summary>
        /// <param name="personData">Persondata template call</param>
        /// <param name="articletext">The article text</param>
        /// <returns>The updated persondata template call</returns>
        private static string CompletePersonDataDate(string personData, string articletext)
        {
            // get the existing values
            string existingBirthYear = Tools.GetTemplateParameterValue(personData, "DATE OF BIRTH", true);
            string existingDeathYear = Tools.GetTemplateParameterValue(personData, "DATE OF DEATH", true);

            if (existingBirthYear.Length == 4 || existingDeathYear.Length == 4)
            {
                Parsers p = new Parsers();
                string  birthDateFound = "", deathDateFound = "";
                string  zerothSection = Tools.GetZerothSection(articletext);

                // remove references, wikilinks, templates
                zerothSection = WikiRegexes.Refs.Replace(zerothSection, " ");
                zerothSection = WikiRegexes.SimpleWikiLink.Replace(zerothSection, " ");

                if (WikiRegexes.CircaTemplate.IsMatch(zerothSection))
                {
                    zerothSection = zerothSection.Substring(0, WikiRegexes.CircaTemplate.Match(zerothSection).Index);
                }

                zerothSection = Tools.NestedTemplateRegex("ndash").Replace(zerothSection, " &ndash;");
                zerothSection = WikiRegexes.NestedTemplates.Replace(zerothSection, " ");
                // clean up any format errors in birth/death dates we may want to use
                zerothSection = p.FixDatesAInternal(zerothSection);

                // look for date in bracketed text, check date matches existing value (from categories)
                foreach (Match m in BracketedBirthDeathDate.Matches(zerothSection))
                {
                    string bValue = m.Value;

                    if (!UncertainWordings.IsMatch(bValue) && !ReignedRuledUnsure.IsMatch(bValue) && !FloruitTemplate.IsMatch(bValue))
                    {
                        string bBorn, bDied = "";
                        // split on died/spaced dash
                        if (FreeFormatDied.IsMatch(bValue))
                        {
                            bBorn = bValue.Substring(0, FreeFormatDied.Match(bValue).Index);
                            bDied = bValue.Substring(FreeFormatDied.Match(bValue).Index);
                        }
                        else
                        {
                            bBorn = bValue;
                        }

                        // born
                        if (existingBirthYear.Length == 4)
                        {
                            if (WikiRegexes.AmericanDates.Matches(bBorn).Count == 1 && WikiRegexes.AmericanDates.Match(bBorn).Value.Contains(existingBirthYear))
                            {
                                birthDateFound = WikiRegexes.AmericanDates.Match(bBorn).Value;
                            }
                            else if (WikiRegexes.InternationalDates.Matches(bBorn).Count == 1 && WikiRegexes.InternationalDates.Match(bBorn).Value.Contains(existingBirthYear))
                            {
                                birthDateFound = WikiRegexes.InternationalDates.Match(bBorn).Value;
                            }
                        }

                        // died
                        if (existingDeathYear.Length == 4)
                        {
                            if (WikiRegexes.AmericanDates.Matches(bDied).Count == 1 && WikiRegexes.AmericanDates.Match(bDied).Value.Contains(existingDeathYear))
                            {
                                deathDateFound = WikiRegexes.AmericanDates.Match(bDied).Value;
                            }
                            else if (WikiRegexes.InternationalDates.Matches(bDied).Count == 1 && WikiRegexes.InternationalDates.Match(bDied).Value.Contains(existingDeathYear))
                            {
                                deathDateFound = WikiRegexes.InternationalDates.Match(bDied).Value;
                            }
                        }

                        if (birthDateFound.Length > 0 || deathDateFound.Length > 0)
                        {
                            break;
                        }
                    }
                }

                if (birthDateFound.Length > 4)
                {
                    personData = Tools.SetTemplateParameterValue(personData, "DATE OF BIRTH", Tools.ConvertDate(birthDateFound, DeterminePredominantDateLocale(articletext, true)), false);
                }

                if (deathDateFound.Length > 4)
                {
                    personData = Tools.SetTemplateParameterValue(personData, "DATE OF DEATH", Tools.ConvertDate(deathDateFound, DeterminePredominantDateLocale(articletext, true)), false);
                }
            }

            return(personData);
        }
Example #3
0
        /// <summary>
        /// Performs fixes to a given citation template call
        /// </summary>
        /// <param name="m"></param>
        /// <returns></returns>
        private static string FixCitationTemplatesME(Match m)
        {
            string newValue = Tools.RemoveExcessTemplatePipes(m.Value);
            string templatename = m.Groups[2].Value;
            
            Dictionary<string, string> paramsFound = new Dictionary<string, string>();
            // remove duplicated fields, ensure the URL is not touched (may have pipes in)
            newValue = Tools.RemoveDuplicateTemplateParameters(newValue, paramsFound);

            string theURL,
                id,
                format,
                theTitle,
                TheYear,
                lang,
                TheDate,
                TheMonth,
                TheWork,
                nopp,
                TheIssue,
                accessyear,
                accessdate,
                pages,
                page,
                ISBN,
                origyear,
                archiveurl,
                contributionurl;
            if(!paramsFound.TryGetValue("url", out theURL))
                theURL = "";
            if(!paramsFound.TryGetValue("id", out id))
                id = "";
            if(!paramsFound.TryGetValue("format", out format))
                format = "";
            if(!paramsFound.TryGetValue("title", out theTitle))
                theTitle = "";
            if(!paramsFound.TryGetValue("year", out TheYear))
                TheYear = "";
            if(!paramsFound.TryGetValue("date", out TheDate))
                TheDate = "";
            if(!paramsFound.TryGetValue("language", out lang))
                lang = "";
            if(!paramsFound.TryGetValue("month", out TheMonth))
                TheMonth = "";
            if(!paramsFound.TryGetValue("work", out TheWork))
                TheWork = "";
            if(!paramsFound.TryGetValue("nopp", out nopp))
                nopp = "";
            if(!paramsFound.TryGetValue("issue", out TheIssue))
                TheIssue = "";
            if(!paramsFound.TryGetValue("accessyear", out accessyear))
                accessyear = "";
            if(!paramsFound.TryGetValue("accessdate", out accessdate))
                accessdate = "";
            if(!paramsFound.TryGetValue("pages", out pages))
                pages = "";
            if(!paramsFound.TryGetValue("page", out page))
                page = "";
            if(!paramsFound.TryGetValue("origyear", out origyear))
                origyear = "";
            if(!paramsFound.TryGetValue("archiveurl", out archiveurl))
                archiveurl = "";
            if(!paramsFound.TryGetValue("contribution-url", out contributionurl))
                contributionurl = "";
            if(!paramsFound.TryGetValue("isbn", out ISBN) && !paramsFound.TryGetValue("ISBN", out ISBN))
                ISBN = "";

            string theURLoriginal = theURL;

            // remove the unneeded 'format=HTML' field
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests#Remove_.22format.3DHTML.22_in_citation_templates
            // remove format= field with null value when URL is HTML page
            if(paramsFound.ContainsKey("format"))
            {
                if (format.TrimStart("[]".ToCharArray()).ToUpper().StartsWith("HTM")
                    ||
                    (format.Length == 0 &&
                     theURL.ToUpper().TrimEnd('L').EndsWith("HTM")))
                    newValue = Tools.RemoveTemplateParameter(newValue, "format");
            }

            // newlines to spaces in title field if URL used, otherwise display broken
            if (theURL.Length > 0 && theTitle.Contains("\r\n"))
            {
                theTitle = theTitle.Replace("\r\n", " ");
                paramsFound.Remove("title");
                paramsFound.Add("title", theTitle);
                newValue = Tools.UpdateTemplateParameterValue(newValue, "title", theTitle);
            }

            // remove language=English on en-wiki
            if (lang.Equals("english", StringComparison.OrdinalIgnoreCase) || lang.Equals("en", StringComparison.OrdinalIgnoreCase))
                newValue = Tools.RemoveTemplateParameter(newValue, "language");

            // remove italics for work field for book/periodical, but not website -- auto italicised by template
            if (TheWork.Length > 0 && !TheWork.Contains("."))
                newValue = WorkInItalics.Replace(newValue, "$1$2");

            // remove quotes around title field: are automatically added by template markup
            foreach (string dequoteParam in ParametersToDequote)
            {
                string quotetitle;
                if(paramsFound.TryGetValue(dequoteParam, out quotetitle))
                {
                    string before = quotetitle;
					// convert curly quotes to straight quotes per [[MOS:PUNCT]], but » or › may be section delimeter
					// so only change those when balanced. Note regular <> characters are not changed.
                    quotetitle = WikiRegexes.CurlyDoubleQuotes.Replace(quotetitle, @"""");
					quotetitle = BalancedArrows.Replace(quotetitle, @"""$1$2""");

                    if (quotetitle.Contains(@"""") && !quotetitle.Trim('"').Contains(@""""))
                        quotetitle = quotetitle.Trim('"');

                    if(!before.Equals(quotetitle))
                        newValue = Tools.SetTemplateParameterValue(newValue, dequoteParam, quotetitle);
                }
            }

            // page= and pages= fields don't need p. or pp. in them when nopp not set
            if ((pages.Contains("p") || page.Contains("p")) && !templatename.Equals("cite journal", StringComparison.OrdinalIgnoreCase) && nopp.Length == 0)
            {
                newValue = CiteTemplatePagesPP.Replace(newValue, "");
                pages = Tools.GetTemplateParameterValue(newValue, "pages");
            }

            // date = YYYY --> year = YYYY if year the same
            // with Lua no need to rename date to year when date = YYYY
            if (TheDate.Length == 4)
            {
                if(TheYear.Equals(TheDate))
                    newValue = Tools.RemoveTemplateParameter(newValue, "date");
            }

            // year = full date --> date = full date
            if (TheYear.Length > 5)
            {
                string TheYearCorected = IncorrectCommaInternationalDates.Replace(TheYear, @"$1 $2");
                TheYearCorected = IncorrectCommaAmericanDates.Replace(TheYearCorected, @"$1 $2, $3");
                
                if(!TheYearCorected.Equals(TheYear))
                {
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "year", TheYearCorected);
                    TheYear = TheYearCorected;
                }
            }
            
            if (TheYear.Length > 5 && (WikiRegexes.ISODates.IsMatch(TheYear) || WikiRegexes.InternationalDates.IsMatch(TheYear)
                                       || WikiRegexes.AmericanDates.IsMatch(TheYear)))
            {
                TheDate = TheYear;
                TheYear = "";
                newValue = Tools.RenameTemplateParameter(newValue, "year", "date");
            }

            // year=YYYY and date=...YYYY -> remove year; not for year=YYYYa
            if (TheYear.Length == 4 && TheDate.Contains(TheYear) && YearOnly.IsMatch(TheYear))
            {
                Parsers p = new Parsers();
                TheDate = p.FixDatesAInternal(TheDate);

                if(WikiRegexes.InternationalDates.IsMatch(TheDate) || WikiRegexes.AmericanDates.IsMatch(TheDate)
                   || WikiRegexes.ISODates.IsMatch(TheDate))
                {
                    TheYear = "";
                    newValue = Tools.RemoveTemplateParameter(newValue, "year");
                }
            }

            // month=Month and date=...Month... OR month=Month and date=same month (by conversion from ISO format)Ors month=nn and date=same month (by conversion to ISO format)
            int num;
            if ((TheMonth.Length > 2 && TheDate.Contains(TheMonth)) // named month within date
                || (TheMonth.Length > 2 && Tools.ConvertDate(TheDate, DateLocale.International).Contains(TheMonth))
                || (int.TryParse(TheMonth, out num) && Regex.IsMatch(Tools.ConvertDate(TheDate, DateLocale.ISO), @"\-0?" + TheMonth + @"\-")))
                newValue = Tools.RemoveTemplateParameter(newValue, "month");

            // date = Month DD and year = YYYY --> date = Month DD, YYYY
            if (!YearOnly.IsMatch(TheDate) && YearOnly.IsMatch(TheYear))
            {
                if (!WikiRegexes.AmericanDates.IsMatch(TheDate) && WikiRegexes.AmericanDates.IsMatch(TheDate + ", " + TheYear))
                {
                    if(!TheDate.Contains(TheYear))
                        newValue = Tools.SetTemplateParameterValue(newValue, "date", TheDate + ", " + TheYear);
                    newValue = Tools.RemoveTemplateParameter(newValue, "year");
                }
                else if (!WikiRegexes.InternationalDates.IsMatch(TheDate) && WikiRegexes.InternationalDates.IsMatch(TheDate + " " + TheYear))
                {
                    if(!TheDate.Contains(TheYear))
                        newValue = Tools.SetTemplateParameterValue(newValue, "date", TheDate + " " + TheYear);
                    newValue = Tools.RemoveTemplateParameter(newValue, "year");
                }
            }

            // correct volume=vol 7... and issue=no. 8 for {{cite journal}} only
            if (templatename.Equals("cite journal", StringComparison.OrdinalIgnoreCase))
            {
                newValue = CiteTemplatesJournalVolume.Replace(newValue, "");
                newValue = CiteTemplatesJournalIssue.Replace(newValue, "");

                if (TheIssue.Length == 0)
                    newValue = CiteTemplatesJournalVolumeAndIssue.Replace(newValue, @"| issue = ");
            }

            // {{cite web}} for Google books -> {{Cite book}}
            if (templatename.Contains("web") && newValue.Contains("http://books.google.")
                && TheWork.Length == 0)
                newValue = Tools.RenameTemplate(newValue, templatename, "Cite book");

            // remove leading zero in day of month
            if(Regex.IsMatch(newValue, @"\b0[1-9]") && DateLeadingZero.IsMatch(newValue))
            {
                newValue = DateLeadingZero.Replace(newValue, @"$1$2$3$4$5");
                newValue = DateLeadingZero.Replace(newValue, @"$1$2$3$4$5");
                TheDate = Tools.GetTemplateParameterValue(newValue, "date");
                accessdate = Tools.GetTemplateParameterValue(newValue, "accessdate");
            }

            if (Regex.IsMatch(templatename, @"[Cc]ite(?: ?web| book| news)"))
            {
                // remove any empty accessdaymonth, accessmonthday, accessmonth and accessyear
                newValue = AccessDayMonthDay.Replace(newValue, "");

                // merge accessdate of 'D Month' or 'Month D' and accessyear of 'YYYY' in cite web
                if(accessyear.Length == 4)
                    newValue = AccessDateYear.Replace(newValue, @" $2$1$3");
            }

            // remove accessyear where accessdate is present and contains said year
            if (accessyear.Length > 0 && accessdate.Contains(accessyear))
                newValue = Tools.RemoveTemplateParameter(newValue, "accessyear");

            // fix unspaced comma ranges, avoid pages=12,345 as could be valid page number
            if (Regex.Matches(pages, @"\b\d{1,2},\d{3}\b").Count == 0)
            {
                while (UnspacedCommaPageRange.IsMatch(pages))
                {
                    pages = UnspacedCommaPageRange.Replace(pages, "$1, $2");
                }
                newValue = Tools.UpdateTemplateParameterValue(newValue, "pages", pages);
                paramsFound.Remove("pages");
                paramsFound.Add("pages", pages);
            }

            // page range should have unspaced en-dash; validate that page is range not section link
            newValue = FixPageRanges(newValue, paramsFound);

            // page range or list should use 'pages' parameter not 'page'
            if (CiteTemplatesPageRangeName.IsMatch(newValue))
            {
                newValue = CiteTemplatesPageRangeName.Replace(newValue, @"$1pages$2");
                newValue = Tools.RemoveDuplicateTemplateParameters(newValue);
            }

            // remove ordinals from dates
            if(Ordinal.IsMatch(TheDate) || Ordinal.IsMatch(accessdate))
            {
                if (OrdinalsInDatesInt.IsMatch(TheDate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "date", OrdinalsInDatesInt.Replace(TheDate, "$1$2$3 $4"));
                else if (OrdinalsInDatesAm.IsMatch(TheDate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "date", OrdinalsInDatesAm.Replace(TheDate, "$1 $2$3"));

                if (OrdinalsInDatesInt.IsMatch(accessdate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "accessdate", OrdinalsInDatesInt.Replace(accessdate, "$1$2$3 $4"));
                else if(OrdinalsInDatesAm.IsMatch(accessdate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "accessdate", OrdinalsInDatesAm.Replace(accessdate, "$1 $2$3"));
            }
            // catch after any other fixes
            newValue = IncorrectCommaAmericanDates.Replace(newValue, @"$1 $2, $3");

            // URL starting www needs http://
            if (theURL.StartsWith("www", StringComparison.OrdinalIgnoreCase))
                theURL = "http://" + theURL;

            if(archiveurl.StartsWith("www", StringComparison.OrdinalIgnoreCase))
                newValue = Tools.UpdateTemplateParameterValue(newValue, "archiveurl", "http://" + archiveurl);
            if(contributionurl.StartsWith("www", StringComparison.OrdinalIgnoreCase))
                newValue = Tools.UpdateTemplateParameterValue(newValue, "contribution-url", "http://" + contributionurl);

            // (part) wikilinked/external linked URL in cite template, don't change when named external link format
            if(!theURL.Contains(" "))
                theURL = theURL.Trim('[').Trim(']');

            if(!theURLoriginal.Equals(theURL))
                newValue = Tools.UpdateTemplateParameterValue(newValue, "url", theURL);

            // {{dead link}} should be placed outside citation, not in format field per [[Template:Dead link]]
            if (WikiRegexes.DeadLink.IsMatch(format))
            {
                string deadLink = WikiRegexes.DeadLink.Match(format).Value;

                if (theURL.ToUpper().TrimEnd('L').EndsWith("HTM") && format.Equals(deadLink))
                    newValue = Tools.RemoveTemplateParameter(newValue, "format");
                else
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "format", format.Replace(deadLink, ""));

                newValue += (" " + deadLink);
            }

            //id=ISBN fix
            if (IdISBN.IsMatch(id) && ISBN.Length == 0)
            {
                newValue = Tools.RenameTemplateParameter(newValue, "id", "isbn");
                newValue = Tools.SetTemplateParameterValue(newValue, "isbn", IdISBN.Match(id).Groups[1].Value.Trim());
            }

            //id=ASIN fix
            if (IdASIN.IsMatch(id) && Tools.GetTemplateParameterValue(newValue, "asin").Length == 0 && Tools.GetTemplateParameterValue(newValue, "ASIN").Length == 0)
            {
                newValue = Tools.RenameTemplateParameter(newValue, "id", "asin");
                newValue = Tools.SetTemplateParameterValue(newValue, "asin", IdASIN.Match(id).Groups[1].Value.Trim());
            }

            if(ISBN.Length > 0)
            {
                string ISBNbefore = ISBN;
                // remove ISBN at start, but not if multiple ISBN
                if(ISBN.IndexOf("isbn", StringComparison.OrdinalIgnoreCase) > -1
                   && ISBN.Substring(4).IndexOf("isbn", StringComparison.OrdinalIgnoreCase) == -1)
                    ISBN = Regex.Replace(ISBN, @"^(?i)ISBN\s*", "");

                // trim unneeded characters
                ISBN = ISBN.Trim(".;,:".ToCharArray()).Trim();

                // fix dashes: only hyphens allowed
                while(ISBNDash.IsMatch(ISBN))
                    ISBN = ISBNDash.Replace(ISBN, @"$1-$2");
                ISBN = ISBN.Replace('\x2010', '-');
                ISBN = ISBN.Replace('\x2012', '-');

                if(!ISBN.Equals(ISBNbefore))
                {
                    if(paramsFound.ContainsKey("ISBN"))
                        newValue = Tools.UpdateTemplateParameterValue(newValue, "ISBN", ISBN);
                    else
                        newValue = Tools.UpdateTemplateParameterValue(newValue, "isbn", ISBN);
                }
            }

            // origyear --> year when no year/date
            if (TheYear.Length == 0 && TheDate.Length == 0 && origyear.Length == 4)
            {
                newValue = Tools.RenameTemplateParameter(newValue, "origyear", "year");
                newValue = Tools.RemoveDuplicateTemplateParameters(newValue);
            }

            return newValue;
        }
Example #4
0
        /// <summary>
        /// Performs fixes to a given citation template call
        /// </summary>
        /// <param name="m"></param>
        /// <returns></returns>
        private static string FixCitationTemplatesME(Match m)
        {
            string newValue = Tools.RemoveExcessTemplatePipes(m.Value);
            string templatename = m.Groups[2].Value;

            Dictionary<string, string> paramsFound = new Dictionary<string, string>();
            // remove duplicated fields, ensure the URL is not touched (may have pipes in)
            newValue = Tools.RemoveDuplicateTemplateParameters(newValue, paramsFound);

            // fix cite params not in lower case, allowing for ISBN, DOI identifiers being uppercase, avoiding changing text within malformatted URL
            foreach (
                string notlowercaseCiteParam in
                    paramsFound.Keys
                        .Where(
                            p =>
                                (p.ToLower() != p) &&
                                !Regex.IsMatch(p,
                                    @"(?:IS[BS]N|DOI|PMID|OCLC|PMC|LCCN|ASIN|ARXIV|ASIN\-TLD|BIBCODE|ID|ISBN13|JFM|JSTOR|MR|OL|OSTI|RFC|SSRN|URL|ZBL)")
                                && !CiteUrl.Match(newValue).Value.Contains(p)))
            {
                newValue = Tools.RenameTemplateParameter(newValue, notlowercaseCiteParam,
                    notlowercaseCiteParam.ToLower());
            }

            string theURL,
                id,
                format,
                theTitle,
                TheYear,
                lang,
                TheDate,
                TheMonth,
                TheWork,
                nopp,
                TheIssue,
                TheVolume,
                accessdate,
                pages,
                page,
                ISBN,
                ISSN,
                origyear,
                origdate,
                archiveurl,
                contributionurl;
            if (!paramsFound.TryGetValue("url", out theURL))
                theURL = "";
            if (!paramsFound.TryGetValue("id", out id) && !paramsFound.TryGetValue("ID", out id))
                id = "";
            if (!paramsFound.TryGetValue("format", out format))
                format = "";
            if (!paramsFound.TryGetValue("title", out theTitle))
                theTitle = "";
            if (!paramsFound.TryGetValue("year", out TheYear))
                TheYear = "";
            if (!paramsFound.TryGetValue("date", out TheDate))
                TheDate = "";
            if (!paramsFound.TryGetValue("language", out lang))
                lang = "";
            if (!paramsFound.TryGetValue("month", out TheMonth))
                TheMonth = "";
            if (!paramsFound.TryGetValue("work", out TheWork))
                TheWork = "";
            if (!paramsFound.TryGetValue("nopp", out nopp))
                nopp = "";
            if (!paramsFound.TryGetValue("issue", out TheIssue))
                TheIssue = "";
            if (!paramsFound.TryGetValue("volume", out TheVolume))
                TheVolume = "";
            if (!paramsFound.TryGetValue("accessdate", out accessdate) &&
                !paramsFound.TryGetValue("access-date", out accessdate))
                accessdate = "";
            if (!paramsFound.TryGetValue("pages", out pages))
                pages = "";
            if (!paramsFound.TryGetValue("page", out page))
                page = "";
            if (!paramsFound.TryGetValue("origyear", out origyear))
                origyear = "";
            if (!paramsFound.TryGetValue("origdate", out origdate))
                origdate = "";
            if (!paramsFound.TryGetValue("archiveurl", out archiveurl))
                archiveurl = "";
            if (!paramsFound.TryGetValue("contribution-url", out contributionurl))
                contributionurl = "";
            if (!paramsFound.TryGetValue("isbn", out ISBN) && !paramsFound.TryGetValue("ISBN", out ISBN))
                ISBN = "";
            if (!paramsFound.TryGetValue("issn", out ISSN) && !paramsFound.TryGetValue("ISSN", out ISSN))
                ISSN = "";

            string theURLoriginal = theURL;

            // remove the unneeded 'format=HTML' field
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests#Remove_.22format.3DHTML.22_in_citation_templates
            // remove format= field with null value when URL is HTML page
            if (paramsFound.ContainsKey("format"))
            {
                if (format.TrimStart("[]".ToCharArray()).ToUpper().StartsWith("HTM")
                    ||
                    (format.Length == 0 &&
                     theURL.ToUpper().TrimEnd('L').EndsWith("HTM")))
                    newValue = Tools.RemoveTemplateParameter(newValue, "format");
            }

            if (paramsFound.ContainsKey("origdate") && origdate.Length == 0)
            {
                newValue = Tools.RemoveTemplateParameter(newValue, "origdate");
            }

            // newlines to spaces in all parameters
            foreach(KeyValuePair<string, string> newlines in paramsFound.Where(p => p.Value.Contains("\r\n")))
            {
                newValue = Tools.UpdateTemplateParameterValue(newValue, newlines.Key, newlines.Value.Replace("\r\n", " "));
            }

            // {{sv icon}} -> sv in language=
            if (lang.Contains("{{"))
            {
                newValue = LangTemplate.Replace(newValue, "$1$3");
                lang = Tools.GetTemplateParameterValue(newValue, "language");
            }

            // remove italics for work field for book/periodical, but not website -- auto italicised by template
            if (TheWork.Contains("''") && !TheWork.Contains("."))
                newValue = WorkInItalics.Replace(newValue, "$1$2");

            // format quotes in title fields, remove stray quotes
            foreach (string dequoteParam in ParametersToDequote)
            {
                string quotetitle;
                if (paramsFound.TryGetValue(dequoteParam, out quotetitle))
                {
                    string before = quotetitle;
                    // convert curly quotes to straight quotes per [[MOS:PUNCT]], but » or › may be section delimeter
                    // so only change those when balanced. Note regular <> characters are not changed.
                    quotetitle = WikiRegexes.CurlyDoubleQuotes.Replace(quotetitle, @"""");
                    quotetitle = BalancedArrows.Replace(quotetitle, @"""$1$2""");

                    // trim stray quotes (but don't change title in quotes as this may be a title that is itself a quote)
                    if(!quotetitle.Trim('"').Contains(@""""))
                    {
                        if (quotetitle.StartsWith(@"""") && !quotetitle.EndsWith(@""""))
                            quotetitle = quotetitle.TrimStart('"');
                        else if (quotetitle.EndsWith(@"""") && !quotetitle.StartsWith(@""""))
                            quotetitle = quotetitle.TrimEnd('"');
                    }

                    if (!before.Equals(quotetitle))
                        newValue = Tools.SetTemplateParameterValue(newValue, dequoteParam, quotetitle);
                }
            }

            // page= and pages= fields don't need p. or pp. in them when nopp not set
            if ((pages.Contains("p") || page.Contains("p")) &&
                !templatename.Equals("cite journal", StringComparison.OrdinalIgnoreCase) && nopp.Length == 0)
            {
                newValue = CiteTemplatePagesPP.Replace(newValue, "");
                pages = Tools.GetTemplateParameterValue(newValue, "pages");
                paramsFound.Remove("pages");
                paramsFound.Add("pages", pages);
            }

            // with Lua no need to rename date to year when date = YYYY, just remove year and date duplicating each other
            if (TheDate.Length == 4 && TheYear.Equals(TheDate))
                newValue = Tools.RemoveTemplateParameter(newValue, "date");

            // year = full date --> date = full date
            if (TheYear.Length > 5)
            {
                string TheYearCorected = IncorrectCommaInternationalDates.Replace(TheYear, @"$1 $2");
                TheYearCorected = IncorrectCommaAmericanDates.Replace(TheYearCorected, @"$1 $2, $3");

                if (!TheYearCorected.Equals(TheYear))
                {
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "year", TheYearCorected);
                    TheYear = TheYearCorected;
                }

                if (WikiRegexes.ISODates.IsMatch(TheYear) || WikiRegexes.InternationalDates.IsMatch(TheYear)
                     || WikiRegexes.AmericanDates.IsMatch(TheYear))
                {
                    TheDate = TheYear;
                    TheYear = "";
                    newValue = Tools.RenameTemplateParameter(newValue, "year", "date");
                }
            }

            // year=YYYY and date=...YYYY -> remove year; not for year=YYYYa
            else if (TheYear.Length == 4 && TheDate.Contains(TheYear) && YearOnly.IsMatch(TheYear))
            {
                Parsers p = new Parsers();
                TheDate = p.FixDatesAInternal(TheDate);

                if (WikiRegexes.InternationalDates.IsMatch(TheDate) || WikiRegexes.AmericanDates.IsMatch(TheDate)
                    || WikiRegexes.ISODates.IsMatch(TheDate))
                {
                    TheYear = "";
                    newValue = Tools.RemoveTemplateParameter(newValue, "year");
                }
            }

            // month=Month and date=...Month... OR month=Month and date=same month (by conversion from ISO format) Or month=nn and date=same month (by conversion to ISO format)
            int num;
            if ((TheMonth.Length > 2 && TheDate.Contains(TheMonth)) // named month within date
                || (TheMonth.Length > 2 && Tools.ConvertDate(TheDate, DateLocale.International).Contains(TheMonth))
                ||
                (int.TryParse(TheMonth, out num) &&
                 Regex.IsMatch(Tools.ConvertDate(TheDate, DateLocale.ISO), @"\-0?" + TheMonth + @"\-")))
            {
                newValue = Tools.RemoveTemplateParameter(newValue, "month");
            }

            // date = Month DD and year = YYYY --> date = Month DD, YYYY
            if (!YearOnly.IsMatch(TheDate) && YearOnly.IsMatch(TheYear))
            {
                if (!WikiRegexes.AmericanDates.IsMatch(TheDate) &&
                    WikiRegexes.AmericanDates.IsMatch(TheDate + ", " + TheYear))
                {
                    if (!TheDate.Contains(TheYear))
                    {
                        newValue = Tools.SetTemplateParameterValue(newValue, "date", TheDate + ", " + TheYear);
                    }
                    newValue = Tools.RemoveTemplateParameter(newValue, "year");
                }
                else if (!WikiRegexes.InternationalDates.IsMatch(TheDate) &&
                         WikiRegexes.InternationalDates.IsMatch(TheDate + " " + TheYear))
                {
                    if (!TheDate.Contains(TheYear))
                    {
                        newValue = Tools.SetTemplateParameterValue(newValue, "date", TheDate + " " + TheYear);
                    }
                    newValue = Tools.RemoveTemplateParameter(newValue, "year");
                }
            }

            // correct volume=vol 7... and issue=no. 8 for {{cite journal}} only
            if (templatename.Equals("cite journal", StringComparison.OrdinalIgnoreCase))
            {
                if(TheVolume.Length > 0)
                    newValue = CiteTemplatesJournalVolume.Replace(newValue, "");
                if (TheIssue.Length > 0)
                    newValue = CiteTemplatesJournalIssue.Replace(newValue, "");
                else
                    newValue = CiteTemplatesJournalVolumeAndIssue.Replace(newValue, @"| issue = ");
            }
            // {{cite web}} for Google books -> {{Cite book}}
            else if (templatename.Contains("web") && newValue.Contains("http://books.google.") && TheWork.Length == 0)
                newValue = Tools.RenameTemplate(newValue, templatename, "Cite book");

            // remove leading zero in day of month
            if(paramsFound.Any(p => p.Key.Contains("date") && Regex.IsMatch(p.Value, @"\b0[1-9]")))
            {
                newValue = DateLeadingZero.Replace(newValue, @"$1$2$3$4$5");
                newValue = DateLeadingZero.Replace(newValue, @"$1$2$3$4$5");
                TheDate = Tools.GetTemplateParameterValue(newValue, "date");
                accessdate = Tools.GetTemplateParameterValue(newValue, "accessdate");
            }

            if (paramsFound.Any(s => s.Key.Contains("access") && !s.Key.Contains("date")))
            {
                string accessyear;
                if (!paramsFound.TryGetValue("accessyear", out accessyear))
                    accessyear = "";

                if (Regex.IsMatch(templatename, @"[Cc]ite(?: ?web| book| news)"))
                {
                    // remove any empty accessdaymonth, accessmonthday, accessmonth and accessyear
                    newValue = AccessDayMonthDay.Replace(newValue, "");

                    // merge accessdate of 'D Month' or 'Month D' and accessyear of 'YYYY' in cite web
                    if (accessyear.Length == 4)
                        newValue = AccessDateYear.Replace(newValue, @" $2$1$3");
                }

                // remove accessyear where accessdate is present and contains said year
                if (accessyear.Length > 0 && accessdate.Contains(accessyear))
                    newValue = Tools.RemoveTemplateParameter(newValue, "accessyear");
            }

            // fix unspaced comma ranges, avoid pages=12,345 as could be valid page number
            if (pages.Contains(",") && !Regex.IsMatch(pages, @"\b[0-9]{1,2},[0-9]{3}\b"))
            {
                while (UnspacedCommaPageRange.IsMatch(pages))
                {
                    pages = UnspacedCommaPageRange.Replace(pages, "$1, $2");
                }
                newValue = Tools.UpdateTemplateParameterValue(newValue, "pages", pages);
                paramsFound.Remove("pages");
                paramsFound.Add("pages", pages);
            }

            // page range should have unspaced en-dash; validate that page is range not section link
            newValue = FixPageRanges(newValue, paramsFound);

            // page range or list should use 'pages' parameter not 'page'
            if (page.Length > 0 && CiteTemplatesPageRangeName.IsMatch(newValue))
            {
                newValue = CiteTemplatesPageRangeName.Replace(newValue, @"$1pages$2");
                newValue = Tools.RemoveDuplicateTemplateParameters(newValue);
            }

            // remove ordinals from dates
            if (Ordinal.IsMatch(TheDate) || Ordinal.IsMatch(accessdate))
            {
                if (OrdinalsInDatesInt.IsMatch(TheDate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "date",
                        OrdinalsInDatesInt.Replace(TheDate, "$1$2$3 $4"));
                else if (OrdinalsInDatesAm.IsMatch(TheDate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "date",
                        OrdinalsInDatesAm.Replace(TheDate, "$1 $2$3"));

                if (OrdinalsInDatesInt.IsMatch(accessdate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "accessdate",
                        OrdinalsInDatesInt.Replace(accessdate, "$1$2$3 $4"));
                else if (OrdinalsInDatesAm.IsMatch(accessdate))
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "accessdate",
                        OrdinalsInDatesAm.Replace(accessdate, "$1 $2$3"));
            }
            // catch after any other fixes
            if(!IncorrectCommaAmericanDates.IsMatch(theURLoriginal))
                newValue = IncorrectCommaAmericanDates.Replace(newValue, @"$1 $2, $3");

            // URL starting www needs http://
            if (theURL.StartsWith("www", StringComparison.OrdinalIgnoreCase))
                theURL = "http://" + theURL;

            if (archiveurl.StartsWith("www", StringComparison.OrdinalIgnoreCase))
                newValue = Tools.UpdateTemplateParameterValue(newValue, "archiveurl", "http://" + archiveurl);
            if (contributionurl.StartsWith("www", StringComparison.OrdinalIgnoreCase))
                newValue = Tools.UpdateTemplateParameterValue(newValue, "contribution-url", "http://" + contributionurl);

            // (part) wikilinked/external linked URL in cite template, don't change when named external link format
            if (!theURL.Contains(" "))
                theURL = theURL.Trim('[').Trim(']');

            if (!theURLoriginal.Equals(theURL))
                newValue = Tools.UpdateTemplateParameterValue(newValue, "url", theURL);

            // {{dead link}} should be placed outside citation, not in format field per [[Template:Dead link]]
            Match deadLinkMatch = WikiRegexes.DeadLink.Match(format);
            if (deadLinkMatch.Success)
            {
                string deadLink = deadLinkMatch.Value;

                if (theURL.ToUpper().TrimEnd('L').EndsWith("HTM") && format.Equals(deadLink))
                    newValue = Tools.RemoveTemplateParameter(newValue, "format");
                else
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "format", format.Replace(deadLink, ""));

                newValue += (" " + deadLink);
            }

            if(id.Length > 0)
            {
                // get id param name, id or ID
                string idParamName = paramsFound.FirstOrDefault(p => p.Key == "ID" || p.Key == "id").Key;

                //id=ISBN fix
                if (IdISBN.IsMatch(id) && ISBN.Length == 0)
                {
                    newValue = Tools.RenameTemplateParameter(newValue, idParamName, "isbn");
                    newValue = Tools.SetTemplateParameterValue(newValue, "isbn", IdISBN.Match(id).Groups[1].Value.Trim());
                }

                //id=ASIN fix
                if (IdASIN.IsMatch(id) && Tools.GetTemplateParameterValue(newValue, "asin", true).Length == 0)
                {
                    newValue = Tools.RenameTemplateParameter(newValue, idParamName, "asin");
                    newValue = Tools.SetTemplateParameterValue(newValue, "asin", IdASIN.Match(id).Groups[1].Value.Trim());
                }

                //id=ISSN fix
                Match IdISSNMatch = IdISSN.Match(id);
                if (IdISSNMatch.Success && ISSN.Length == 0)
                {
                    string newIssn = IdISSNMatch.Groups[1].Value + "-" + IdISSNMatch.Groups[2].Value; // 1234-5678 using standard hyphen
                    newValue = Tools.RenameTemplateParameter(newValue, idParamName, "issn");
                    newValue = Tools.SetTemplateParameterValue(newValue, "issn", newIssn);
                }
            }

            // format ISSN: 1234-5678 with hyphen
            if(ISSN.Length > 0)
            {
                string newISSN = Regex.Replace (ISSN, @"^([0-9]{4}) *[- –]* *([0-9]{3}[0-9X])$", "$1-$2");

                if (!newISSN.Equals (ISSN))
                    newValue = Tools.UpdateTemplateParameterValue (newValue, paramsFound.FirstOrDefault (p => p.Key == "ISSN" || p.Key == "issn").Key, newISSN);
            }

            if (ISBN.Length > 0)
            {
                string ISBNbefore = ISBN;
                // remove ISBN at start, but not if multiple ISBN
                if (ISBN.IndexOf("isbn", StringComparison.OrdinalIgnoreCase) > -1
                    && ISBN.Substring(4).IndexOf("isbn", StringComparison.OrdinalIgnoreCase) == -1)
                    ISBN = Regex.Replace(ISBN, @"^(?i)ISBN\s*", "");

                // trim unneeded characters
                ISBN = ISBN.Trim(".;,:".ToCharArray()).Trim();

                // fix dashes: only hyphens allowed
                while (ISBNDash.IsMatch(ISBN))
                    ISBN = ISBNDash.Replace(ISBN, @"$1-$2");
                ISBN = ISBN.Replace('\x2010', '-');
                ISBN = ISBN.Replace('\x2012', '-');

                if (!ISBN.Equals(ISBNbefore))
                {
                    if (paramsFound.ContainsKey("ISBN"))
                        newValue = Tools.UpdateTemplateParameterValue(newValue, "ISBN", ISBN);
                    else
                        newValue = Tools.UpdateTemplateParameterValue(newValue, "isbn", ISBN);
                }
            }

            // origyear --> year when no year/date
            if (origyear.Length == 4 && TheYear.Length == 0 && TheDate.Length == 0)
            {
                newValue = Tools.RenameTemplateParameter(newValue, "origyear", "year");
                newValue = Tools.RemoveDuplicateTemplateParameters(newValue);
            }

            return newValue;
        }