/// <summary> /// Adds [[Category:XXXX births]], [[Category:XXXX deaths]] to articles about people where available, for en-wiki only /// When page is not mainspace, adds [[:Category rather than [[Category /// Removes Date of birth missing/Date of birth missing (living people) category if full DOB in {{birth date and age}} /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">Title of the article</param> /// <param name="parseTalkPage"></param> /// <returns></returns> public static string FixPeopleCategories(string articleText, string articleTitle, bool parseTalkPage) { if (!Variables.LangCode.Equals("en")) { return(articleText); } // Performance: apply births/deaths category check on categories string, not whole article string cats = GetCats(articleText); bool dolmatch = WikiRegexes.DeathsOrLivingCategory.IsMatch(cats), bimatch = WikiRegexes.BirthsCategory.IsMatch(cats); // no work to do if already has a birth and a death/living cat if (dolmatch && bimatch) { return(YearOfBirthDeathMissingCategory(articleText, cats)); } // over 20 references or long and not DOB/DOD categorised at all yet: implausible if ((articleText.Length > 15000 && !bimatch && !dolmatch) || (!dolmatch && WikiRegexes.Refs.Matches(articleText).Count > 20)) { return(YearOfBirthDeathMissingCategory(articleText, cats)); } string articleTextBefore = articleText; int catCount = WikiRegexes.Category.Matches(articleText).Count; // get the zeroth section (text upto first heading) string zerothSection = Tools.GetZerothSection(articleText); // remove references and long wikilinks (but allow an ISO date) that may contain false positives of birth/death date zerothSection = WikiRegexes.Refs.Replace(zerothSection, " "); while (LongWikilink.IsMatch(zerothSection)) { zerothSection = LongWikilink.Replace(zerothSection, " "); } // ignore dates containing years from dated maintenance tags etc. zerothSection = WikiRegexes.NestedTemplates.Replace(zerothSection, m2 => ThreeOrFourDigitNumber.IsMatch(Tools.GetTemplateParameterValue(m2.Value, "date")) ? "" : m2.Value); zerothSection = WikiRegexes.TemplateMultiline.Replace(zerothSection, m2 => ThreeOrFourDigitNumber.IsMatch(Tools.GetTemplateParameterValue(m2.Value, "date")) ? "" : m2.Value); string StartCategory = Tools.Newline(@"[[" + (Namespace.IsMainSpace(articleTitle) ? "" : ":") + @"Category:"); string yearstring, yearFromInfoBox = "", sort = GetCategorySort(articleText); bool alreadyUncertain = false; // scrape any infobox for birth year, ignore {{Birth date based on age at death}} string fromInfoBox = GetInfoBoxFieldValue(BirthDateBasedOnAgeAtDeath.Replace(zerothSection, ""), WikiRegexes.InfoBoxDOBFields); // ignore as of dates if (AsOfText.IsMatch(fromInfoBox)) { fromInfoBox = fromInfoBox.Substring(0, AsOfText.Match(fromInfoBox).Index); } if (fromInfoBox.Length > 0 && !UncertainWordings.IsMatch(fromInfoBox)) { yearFromInfoBox = YearPossiblyWithBC.Match(fromInfoBox).Value; } // convert [[:Category to [[Category for non-mainspace Category checking string checkText = Namespace.IsMainSpace(articleTitle) ? articleText : articleText.Replace("[[:", "[["); // birth if (!WikiRegexes.BirthsCategory.IsMatch(checkText) && (PersonYearOfBirth.Matches(zerothSection).Count == 1 || WikiRegexes.DateBirthAndAge.IsMatch(zerothSection) || WikiRegexes.DeathDateAndAge.IsMatch(zerothSection) || ThreeOrFourDigitNumber.IsMatch(yearFromInfoBox))) { // look for '{{birth date...' template first yearstring = WikiRegexes.DateBirthAndAge.Match(articleText).Groups[1].Value; // look for '{{death date and age' template second if (String.IsNullOrEmpty(yearstring)) { yearstring = WikiRegexes.DeathDateAndAge.Match(articleText).Groups[2].Value; } // thirdly use yearFromInfoBox if (ThreeOrFourDigitNumber.IsMatch(yearFromInfoBox)) { yearstring = yearFromInfoBox; } // look for '(born xxxx)' if (String.IsNullOrEmpty(yearstring)) { Match m = PersonYearOfBirth.Match(zerothSection); // remove part beyond dash or died string birthpart = DiedOrBaptised.Replace(m.Value, "$1"); if (WikiRegexes.CircaTemplate.IsMatch(birthpart)) { alreadyUncertain = true; } birthpart = WikiRegexes.TemplateMultiline.Replace(birthpart, " "); // check born info before any untemplated died info if (!(m.Index > PersonYearOfDeath.Match(zerothSection).Index) || !PersonYearOfDeath.IsMatch(zerothSection)) { // when there's only an approximate birth year, add the appropriate cat rather than the xxxx birth one if (UncertainWordings.IsMatch(birthpart) || alreadyUncertain || FloruitTemplate.IsMatch(birthpart)) { if (!CategoryMatch(articleText, YearOfBirthMissingLivingPeople) && !CategoryMatch(articleText, YearOfBirthUncertain)) { articleText += StartCategory + YearOfBirthUncertain + CatEnd(sort); } } else // after removing dashes, birthpart must still contain year if (!birthpart.Contains(@"?") && Regex.IsMatch(birthpart, @"\d{3,4}")) { yearstring = m.Groups[1].Value; } } } // per [[:Category:Living people]], don't apply birth category if born > 121 years ago // validate a YYYY date is not in the future if (!string.IsNullOrEmpty(yearstring) && yearstring.Length > 2 && (!YearOnly.IsMatch(yearstring) || Convert.ToInt32(yearstring) <= DateTime.Now.Year) && !(articleText.Contains(CategoryLivingPeople) && Convert.ToInt32(yearstring) < (DateTime.Now.Year - 121))) { articleText += StartCategory + yearstring + " births" + CatEnd(sort); } } // scrape any infobox yearFromInfoBox = ""; fromInfoBox = GetInfoBoxFieldValue(articleText, WikiRegexes.InfoBoxDODFields); if (fromInfoBox.Length > 0 && !UncertainWordings.IsMatch(fromInfoBox)) { yearFromInfoBox = YearPossiblyWithBC.Match(fromInfoBox).Value; } checkText = Namespace.IsMainSpace(articleTitle) ? articleText : articleText.Replace("[[:", "[["); if (!WikiRegexes.DeathsOrLivingCategory.IsMatch(RemoveCategory(YearofDeathMissing, checkText)) && (PersonYearOfDeath.IsMatch(zerothSection) || WikiRegexes.DeathDate.IsMatch(zerothSection) || ThreeOrFourDigitNumber.IsMatch(yearFromInfoBox))) { // look for '{{death date...' template first yearstring = WikiRegexes.DeathDate.Match(articleText).Groups[1].Value; // secondly use yearFromInfoBox if (ThreeOrFourDigitNumber.IsMatch(yearFromInfoBox)) { yearstring = yearFromInfoBox; } // look for '(died xxxx)' if (string.IsNullOrEmpty(yearstring)) { Match m = PersonYearOfDeath.Match(zerothSection); // check died info after any untemplated born info if (m.Index >= PersonYearOfBirth.Match(zerothSection).Index || !PersonYearOfBirth.IsMatch(zerothSection)) { if (!UncertainWordings.IsMatch(m.Value) && !m.Value.Contains(@"?")) { yearstring = m.Groups[1].Value; } } } // validate a YYYY date is not in the future if (!string.IsNullOrEmpty(yearstring) && yearstring.Length > 2 && (!YearOnly.IsMatch(yearstring) || Convert.ToInt32(yearstring) <= DateTime.Now.Year)) { articleText += StartCategory + yearstring + " deaths" + CatEnd(sort); } } zerothSection = NotCircaTemplate.Replace(zerothSection, " "); // birth and death combined // if not fully categorised, check it if (PersonYearOfBirthAndDeath.IsMatch(zerothSection) && (!WikiRegexes.BirthsCategory.IsMatch(articleText) || !WikiRegexes.DeathsOrLivingCategory.IsMatch(articleText))) { Match m = PersonYearOfBirthAndDeath.Match(zerothSection); string birthyear = m.Groups[1].Value; int birthyearint = int.Parse(birthyear); string deathyear = m.Groups[3].Value; int deathyearint = int.Parse(deathyear); // logical valdiation of dates if (birthyearint <= deathyearint && (deathyearint - birthyearint) <= 125) { string birthpart = zerothSection.Substring(m.Index, m.Groups[2].Index - m.Index), deathpart = zerothSection.Substring(m.Groups[2].Index, (m.Value.Length + m.Index) - m.Groups[2].Index); if (!WikiRegexes.BirthsCategory.IsMatch(articleText)) { if (!UncertainWordings.IsMatch(birthpart) && !ReignedRuledUnsure.IsMatch(m.Value) && !Regex.IsMatch(birthpart, @"(?:[Dd](?:ied|\.)|baptised)") && !FloruitTemplate.IsMatch(birthpart)) { articleText += StartCategory + birthyear + @" births" + CatEnd(sort); } else if (UncertainWordings.IsMatch(birthpart) && !CategoryMatch(articleText, YearOfBirthMissingLivingPeople) && !CategoryMatch(articleText, YearOfBirthUncertain)) { articleText += StartCategory + YearOfBirthUncertain + CatEnd(sort); } } if (!UncertainWordings.IsMatch(deathpart) && !ReignedRuledUnsure.IsMatch(m.Value) && !Regex.IsMatch(deathpart, @"[Bb](?:orn|\.)") && !Regex.IsMatch(birthpart, @"[Dd](?:ied|\.)") && (!WikiRegexes.DeathsOrLivingCategory.IsMatch(articleText) || CategoryMatch(articleText, YearofDeathMissing))) { articleText += StartCategory + deathyear + @" deaths" + CatEnd(sort); } } } // do this check last as IsArticleAboutAPerson can be relatively slow if (!articleText.Equals(articleTextBefore) && !IsArticleAboutAPerson(articleTextBefore, articleTitle, parseTalkPage)) { return(YearOfBirthDeathMissingCategory(articleTextBefore, cats)); } // {{uncat}} --> {{Improve categories}} if we've added cats if (WikiRegexes.Category.Matches(articleText).Count > catCount && WikiRegexes.Uncat.IsMatch(articleText) && !WikiRegexes.CatImprove.IsMatch(articleText)) { articleText = Tools.RenameTemplate(articleText, WikiRegexes.Uncat.Match(articleText).Groups[1].Value, "Improve categories"); } return(YearOfBirthDeathMissingCategory(articleText, GetCats(articleText))); }
/// <summary> /// Sets persondata date of birth/death fields based on unformatted info in zeroth section of article, provided dates match existing birth/death categories /// </summary> /// <param name="personData">Persondata template call</param> /// <param name="articletext">The article text</param> /// <returns>The updated persondata template call</returns> private static string CompletePersonDataDate(string personData, string articletext) { // get the existing values string existingBirthYear = Tools.GetTemplateParameterValue(personData, "DATE OF BIRTH", true); string existingDeathYear = Tools.GetTemplateParameterValue(personData, "DATE OF DEATH", true); if (existingBirthYear.Length == 4 || existingDeathYear.Length == 4) { Parsers p = new Parsers(); string birthDateFound = "", deathDateFound = ""; string zerothSection = Tools.GetZerothSection(articletext); // remove references, wikilinks, templates zerothSection = WikiRegexes.Refs.Replace(zerothSection, " "); zerothSection = WikiRegexes.SimpleWikiLink.Replace(zerothSection, " "); if (WikiRegexes.CircaTemplate.IsMatch(zerothSection)) { zerothSection = zerothSection.Substring(0, WikiRegexes.CircaTemplate.Match(zerothSection).Index); } zerothSection = Tools.NestedTemplateRegex("ndash").Replace(zerothSection, " –"); zerothSection = WikiRegexes.NestedTemplates.Replace(zerothSection, " "); // clean up any format errors in birth/death dates we may want to use zerothSection = p.FixDatesAInternal(zerothSection); // look for date in bracketed text, check date matches existing value (from categories) foreach (Match m in BracketedBirthDeathDate.Matches(zerothSection)) { string bValue = m.Value; if (!UncertainWordings.IsMatch(bValue) && !ReignedRuledUnsure.IsMatch(bValue) && !FloruitTemplate.IsMatch(bValue)) { string bBorn, bDied = ""; // split on died/spaced dash if (FreeFormatDied.IsMatch(bValue)) { bBorn = bValue.Substring(0, FreeFormatDied.Match(bValue).Index); bDied = bValue.Substring(FreeFormatDied.Match(bValue).Index); } else { bBorn = bValue; } // born if (existingBirthYear.Length == 4) { if (WikiRegexes.AmericanDates.Matches(bBorn).Count == 1 && WikiRegexes.AmericanDates.Match(bBorn).Value.Contains(existingBirthYear)) { birthDateFound = WikiRegexes.AmericanDates.Match(bBorn).Value; } else if (WikiRegexes.InternationalDates.Matches(bBorn).Count == 1 && WikiRegexes.InternationalDates.Match(bBorn).Value.Contains(existingBirthYear)) { birthDateFound = WikiRegexes.InternationalDates.Match(bBorn).Value; } } // died if (existingDeathYear.Length == 4) { if (WikiRegexes.AmericanDates.Matches(bDied).Count == 1 && WikiRegexes.AmericanDates.Match(bDied).Value.Contains(existingDeathYear)) { deathDateFound = WikiRegexes.AmericanDates.Match(bDied).Value; } else if (WikiRegexes.InternationalDates.Matches(bDied).Count == 1 && WikiRegexes.InternationalDates.Match(bDied).Value.Contains(existingDeathYear)) { deathDateFound = WikiRegexes.InternationalDates.Match(bDied).Value; } } if (birthDateFound.Length > 0 || deathDateFound.Length > 0) { break; } } } if (birthDateFound.Length > 4) { personData = Tools.SetTemplateParameterValue(personData, "DATE OF BIRTH", Tools.ConvertDate(birthDateFound, DeterminePredominantDateLocale(articletext, true)), false); } if (deathDateFound.Length > 4) { personData = Tools.SetTemplateParameterValue(personData, "DATE OF DEATH", Tools.ConvertDate(deathDateFound, DeterminePredominantDateLocale(articletext, true)), false); } } return(personData); }