public string SubstituteTemplates(string ArticleText, string ArticleTitle) { if (Regexes.Count == 0) { return(ArticleText); // nothing to substitute } if (chkIgnoreUnformatted.Checked) { ArticleText = RemoveUnformatted.HideUnformatted(ArticleText); } if (!chkUseExpandTemplates.Checked) { foreach (KeyValuePair <Regex, string> p in Regexes) { ArticleText = p.Key.Replace(ArticleText, p.Value); } } else { ArticleText = Tools.ExpandTemplate(ArticleText, ArticleTitle, Regexes, chkIncludeComment.Checked); } if (chkIgnoreUnformatted.Checked) { ArticleText = RemoveUnformatted.AddBackUnformatted(ArticleText); } return(ArticleText); }
//TODO:Needs re-write /// <summary> /// If necessary, adds/removes wikify or stub tag /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The article title.</param> /// <param name="restrictOrphanTagging"></param> /// <param name="summary"></param> /// <returns>The tagged article.</returns> public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary) { if(!TaggerPermitted(articleText, articleTitle)) return articleText; tagsRemoved.Clear(); tagsAdded.Clear(); int tagsrenamed = 0; string commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); Sorter.Interwikis(ref commentsStripped); // bulleted or indented text should weigh less than simple text. // for example, actor stubs may contain large filmographies string crapStripped = BulletedText.Replace(commentsCategoriesStripped, ""); int words = (Tools.WordCount(commentsCategoriesStripped, 999) + Tools.WordCount(crapStripped, 999)) / 2; // remove stub tags from long articles, don't move section stubs if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped)) { articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim(); if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("بذرة"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("تقاوى"); } else if (Variables.LangCode.Equals("hy")) { tagsRemoved.Add("Անավարտ"); } else { tagsRemoved.Add("stub"); } } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); //remove disambiguation if disambiguation cleanup exists (en-wiki only) if (Variables.LangCode.Equals("en") && WikiRegexes.DisambigsCleanup.IsMatch(commentsStripped)) { articleText = WikiRegexes.DisambigsGeneral.Replace(articleText, "").Trim(); } // do orphan tagging before template analysis for categorisation tags articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging); articleText = TagRefsIbid(articleText); articleText = TagEmptySection(articleText); int totalCategories; // ignore commented out wikilinks, and any in {{Proposed deletion/dated}} int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, "")); #if DEBUG || UNITTEST if (Globals.UnitTestMode) { totalCategories = Globals.UnitTestIntValue; } else #endif { // stubs add non-hidden stub categories, don't count these in categories count // also don't count "Proposed deletion..." cats // limitation: in the unlikely event that the article has only redlinked cats then it is {{uncat}} but we won't tag it as such totalCategories = RegularCategories(articleText).Count; // templates may add categories to page that are not [[Category...]] links, so use API call for accurate Category count if(totalCategories == 0) totalCategories = RegularCategories(CategoryProv.MakeList(new[] { articleTitle })).Count; } // remove dead end if > 0 wikilinks on page if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.DeadEnd.Replace(articleText, ""); else articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.DeadEnd.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("نهاية مسدودة"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("نهاية مسدودة"); } else { tagsRemoved.Add("deadend"); } } } // discount persondata, comments, infoboxes and categories from wikify/underlinked and stub evaluation string lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, ""); lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, ""); lengthtext = Drugbox.Replace(lengthtext, ""); int length = lengthtext.Length + 1; bool underlinked = (wikiLinkCount < 0.0025 * length); if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(commentsCategoriesStripped)) { // add stub tag. Exclude pages their title starts with "List of..." if (!ListOf.IsMatch(articleTitle) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle)) { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة}}", 3); tagsAdded.Add("بذرة"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى}}", 3); tagsAdded.Add("تقاوى"); } else if (Variables.LangCode.Equals("hy")) { articleText += Tools.Newline("{{Անավարտ}}", 3); tagsAdded.Add("Անավարտ"); } else { articleText += Tools.Newline("{{stub}}", 3); tagsAdded.Add("stub"); } commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); } } // rename existing {{improve categories}} else add uncategorized tag if (totalCategories == 0 && ImproveCategories.IsMatch(articleText)) articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized"); // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems // nl wiki doesn't use {{Uncategorized}} template // prevent wictionary redirects from being tagged as uncategorised if (words > 6 && totalCategories == 0 && !WikiRegexes.Uncat.IsMatch(articleText) && Variables.LangCode != "nl") { if (WikiRegexes.Stub.IsMatch(commentsStripped)) { // add uncategorized stub tag if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]"); } else if(Variables.LangCode.Equals("hy")) // same template for uncat and uncat stub { articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("Կատեգորիա չկա"); } else if(Variables.LangCode.Equals("sv")) // same template for uncat and uncat stub { articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]"); } else { articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]"); } } else { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]"); } else if(Variables.LangCode.Equals("el")) { articleText += Tools.Newline("{{Ακατηγοριοποίητο|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[Πρότυπο:Ακατηγοριοποίητο|ακατηγοριοποίητο]]"); } else if(Variables.LangCode.Equals("hy")) { articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("Կատեգորիա չկա"); } else if(Variables.LangCode.Equals("sv")) { articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]"); } else { articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|uncategorised]]"); } } } // remove {{Uncategorized}} if > 0 real categories (stub categories not counted) // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted) if (WikiRegexes.Uncat.IsMatch(articleText)) { if (totalCategories > 0) { articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart(); if (Variables.LangCode.Equals("ar")) tagsRemoved.Add("غير مصنفة"); else if (Variables.LangCode.Equals("arz")) tagsRemoved.Add("مش متصنفه"); else tagsRemoved.Add("uncategorised"); } else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped)) { // rename uncat to uncat stub if no uncat stub. If uncat and uncat stub, remove uncat. bool uncatstub = false; foreach(Match u in WikiRegexes.Uncat.Matches(articleText)) { if(WikiRegexes.Stub.IsMatch(u.Value)) { uncatstub = true; break; } } articleText = WikiRegexes.Uncat.Replace(articleText, u2 => { if (!uncatstub) // rename { tagsrenamed++; if (Variables.LangCode.Equals("ar")) return Tools.RenameTemplate(u2.Value, "بذرة غير مصنفة"); else if (Variables.LangCode.Equals("arz")) return Tools.RenameTemplate(u2.Value, "تقاوى مش متصنفه"); else if (Variables.LangCode.Equals("en") || Variables.LangCode.Equals("simple")) return Tools.RenameTemplate(u2.Value, "Uncategorized stub"); } else // already uncat stub so remove plain uncat { if(!WikiRegexes.Stub.IsMatch(u2.Value)) { if (Variables.LangCode.Equals("ar")) tagsRemoved.Add("غير مصنفة"); else if (Variables.LangCode.Equals("arz")) tagsRemoved.Add("مش متصنفه"); else tagsRemoved.Add("uncategorised"); return ""; } } return u2.Value; }); } } if (wikiLinkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle) ) { // add dead-end tag // no blank line between dead end and orphan tags for ar/arz if (Variables.LangCode.Equals("ar")) { articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + (WikiRegexes.Orphan.IsMatch(articleText) ? "" : "\r\n") + articleText; tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]"); // if dead end then remove underlinked/wikify if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكي"); } } else if (Variables.LangCode.Equals("arz")) { articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + articleText; tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]"); // if dead end then remove underlinked if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكى"); } } else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText) && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b") && !MinorPlanetListFooter.IsMatch(articleText)) { // Don't add excess newlines between new tags articleText = "{{Dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText; tagsAdded.Add("[[CAT:DE|deadend]]"); // if dead end then remove underlinked if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1) { articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart(); tagsRemoved.Add("underlinked"); } } } // add underlinked/wikify tag, don't add underlinked/wikify if {{dead end}} already present // Dont' tag SIA pages, may create wikilinks from templates else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText) && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify") && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle)) { // Avoid excess newlines between templates string templateEnd = "}}\r\n" + (articleText.TrimStart().StartsWith(@"{{") ? "" : "\r\n"); if (Variables.LangCode.Equals("ar")) { articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText.TrimStart(); tagsAdded.Add("[[وب:ويكي|ويكي]]"); } else if (Variables.LangCode.Equals("arz")) { articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[قالب:ويكى|ويكى]]"); } else if (Variables.LangCode.Equals("sv")) { articleText = "{{Ickewiki|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[WP:PW|ickewiki]]"); } else { articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[CAT:UL|underlinked]]"); } } else if (wikiLinkCount > 3 && !underlinked && WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.Wikify.Replace(articleText, ""); else // remove wikify, except section templates or wikify tags with reason parameter specified articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("ويكي"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("ويكى"); } else { tagsRemoved.Add("underlinked"); } } } // rename unreferenced --> refimprove if has existing refs, update date if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped) && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0) { articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}")); // update tag in old-style multiple issues Match m = WikiRegexes.MultipleIssues.Match(articleText); if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0) { string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove"); newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"); if (!newValue.Equals(m.Value)) articleText = articleText.Replace(m.Value, newValue); } } if (tagsAdded.Count > 0 || tagsRemoved.Count > 0 || tagsrenamed > 0) { Parsers p = new Parsers(); HideText hider = new HideText(); articleText = hider.HideUnformatted(articleText); articleText = p.MultipleIssues(articleText); articleText = Conversions(articleText); articleText = hider.AddBackUnformatted(articleText); // sort again in case tag removal requires whitespace cleanup // Don't sort interwikis, we can't specify the correct InterWikiSortOrder p.SortInterwikis = false; articleText = p.Sorter.Sort(articleText, articleTitle); } summary = PrepareTaggerEditSummary(); return articleText; }
/// <summary> /// Sets the date (month & year) for undated cleanup tags that take a date /// Avoids changing tags in unformatted text areas (wiki comments etc.) /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <returns>The updated article text</returns> public static string TagUpdater(string articleText) { HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); foreach (KeyValuePair<Regex, string> k in RegexTagger) { articleText = k.Key.Replace(articleText, k.Value); } return ht.AddBackUnformatted(articleText); }
//TODO:Needs re-write /// <summary> /// If necessary, adds/removes wikify or stub tag /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The article title.</param> /// <param name="restrictOrphanTagging"></param> /// <param name="summary"></param> /// <returns>The tagged article.</returns> public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary) { // don't tag redirects/outside article namespace/no tagging changes if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText)) return articleText; tagsRemoved.Clear(); tagsAdded.Clear(); string commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); Sorter.Interwikis(ref commentsStripped); // bulleted or indented text should weigh less than simple text. // for example, actor stubs may contain large filmographies string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, ""); int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped))/2; // remove stub tags from long articles if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped)) { articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim(); tagsRemoved.Add("stub"); } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); // on en wiki, remove expand template when a stub template exists // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests/Archive_5#Remove_.7B.7Bexpand.7D.7D_when_a_stub_template_exists if (Variables.LangCode == "en" && WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && WikiRegexes.Expand.IsMatch(commentsCategoriesStripped)) { articleText = WikiRegexes.Expand.Replace(articleText, ""); tagsRemoved.Add("expand"); } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); // do orphan tagging before template analysis for categorisation tags articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging); articleText = TagRefsIbid(articleText); articleText = TagEmptySection(articleText); int totalCategories; int linkCount = Tools.LinkCount(commentsStripped); #if DEBUG || UNITTEST if (Globals.UnitTestMode) { totalCategories = Globals.UnitTestIntValue; } else #endif { // stubs add non-hidden stub categories, don't count these in categories count List<Article> Cats = CategoryProv.MakeList(new[] {articleTitle}); List<Article> CatsNotStubs = new List<Article>(); foreach (Article a in Cats) { if (!a.Name.EndsWith(" stubs") && !a.Name.EndsWith(":Stubs")) CatsNotStubs.Add(a); } totalCategories = CatsNotStubs.Count; } if (linkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText)) { articleText = WikiRegexes.DeadEnd.Replace(articleText, new MatchEvaluator(SectionTagME)); if(!WikiRegexes.DeadEnd.IsMatch(articleText)) tagsRemoved.Add("deadend"); } // discount persondata along with comments and categories from wikify and stub evaluation int length = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "").Length + 1; bool underlinked = (linkCount < 0.0025*length); if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped)) { // add stub tag articleText += Tools.Newline("{{stub}}", 3); tagsAdded.Add("stub"); commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); } // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems // nl wiki doesn't use {{Uncategorized}} template // prevent wictionary redirects from being tagged as uncategorised if (words > 6 && totalCategories == 0 && !WikiRegexes.Uncat.IsMatch(articleText) && Variables.LangCode != "nl" && !Tools.NestedTemplateRegex("cat improve").IsMatch(articleText) // category count is from API; don't add uncat tag if genfixes added person categories && !WikiRegexes.DeathsOrLivingCategory.IsMatch(articleText) && !WikiRegexes.BirthsCategory.IsMatch(articleText)) { if (WikiRegexes.Stub.IsMatch(commentsStripped)) { // add uncategorized stub tag articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]"); } else { // add uncategorized tag articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|uncategorised]]"); } } // remove {{Uncategorized}} if > 0 real categories (stub categories not counted) // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted) if (WikiRegexes.Uncat.IsMatch(articleText)) { if (totalCategories > 0) { articleText = WikiRegexes.Uncat.Replace(articleText, ""); tagsRemoved.Add("uncategorised"); } else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped)) { string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value; if (!uncatname.Contains("stub")) articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub"); } } if (linkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && Variables.LangCode != "sv" && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b")) { // add dead-end tag articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[:Category:Dead-end pages|deadend]]"); } if (linkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText) && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")) { // add wikify tag articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[WP:WFY|wikify]]"); } else if (linkCount > 3 && !underlinked && WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, new MatchEvaluator(SectionTagME)); if(!WikiRegexes.Wikify.IsMatch(articleText)) tagsRemoved.Add("wikify"); } // rename unreferenced --> refimprove if has existing refs if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped) && WikiRegexes.Refs.Matches(commentsCategoriesStripped).Count > 0) { articleText = Tools.RenameTemplate(articleText, "unreferenced", "refimprove", true); Match m = WikiRegexes.MultipleIssues.Match(articleText); if(m.Success) { string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove"); if(!newValue.Equals(m.Value)) articleText = articleText.Replace(m.Value, newValue); } } if (tagsAdded.Count > 0 || tagsRemoved.Count > 0) { Parsers p = new Parsers(); HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); articleText = p.MultipleIssues(articleText); articleText = Conversions(articleText); articleText = ht.AddBackUnformatted(articleText); // sort again in case tag removal requires whitespace cleanup articleText = p.Sorter.Sort(articleText, articleTitle); } summary = PrepareTaggerEditSummary(); return articleText; }
//TODO:Needs re-write /// <summary> /// If necessary, adds/removes wikify or stub tag /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The article title.</param> /// <param name="restrictOrphanTagging"></param> /// <param name="summary"></param> /// <returns>The tagged article.</returns> public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary) { // don't tag redirects/outside article namespace/no tagging changes if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText)) return articleText; tagsRemoved.Clear(); tagsAdded.Clear(); string commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); Sorter.Interwikis(ref commentsStripped); // bulleted or indented text should weigh less than simple text. // for example, actor stubs may contain large filmographies string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, ""); int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped)) / 2; // remove stub tags from long articles, don't move section stubs if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped)) { articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim(); tagsRemoved.Add("stub"); } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); // do orphan tagging before template analysis for categorisation tags articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging); articleText = TagRefsIbid(articleText); articleText = TagEmptySection(articleText); int totalCategories; // ignore commented out wikilinks, and any in {{Proposed deletion/dated}} int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, "")); #if DEBUG || UNITTEST if (Globals.UnitTestMode) { totalCategories = Globals.UnitTestIntValue; } else #endif { // stubs add non-hidden stub categories, don't count these in categories count // also don't count "Proposed deletion..." cats List<Article> Cats = CategoryProv.MakeList(new[] { articleTitle }); totalCategories = RegularCategories(Cats).Count; // cats may have been added to page by genfixes, F&R or user (when reparsing) so check cats on page if API says zero // so we correctly count for uncat tagging if(totalCategories == 0) totalCategories = RegularCategories(articleText).Count; } // remove dead end when wikilinks on page, but not for en-wiki where dead end can mean "not enough" links if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText) && !Variables.LangCode.Equals("en")) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.DeadEnd.Replace(articleText, ""); else articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.DeadEnd.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("نهاية مسدودة"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("نهاية مسدودة"); } else { tagsRemoved.Add("deadend"); } } } // discount persondata, comments, infoboxes and categories from wikify and stub evaluation string lengthtext = commentsCategoriesStripped; lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, ""); lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, ""); lengthtext = Drugbox.Replace(lengthtext, ""); int length = lengthtext.Length + 1; bool underlinked = (wikiLinkCount < 0.0025 * length); if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped)) { // add stub tag. Exclude pages their title starts with "List of..." if (!ListOf.IsMatch(articleTitle)) { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة}}", 3); tagsAdded.Add("بذرة"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى}}", 3); tagsAdded.Add("تقاوى"); } else { articleText += Tools.Newline("{{stub}}", 3); tagsAdded.Add("stub"); } commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); } } // rename existing {{improve categories}} else add uncategorized tag if (totalCategories == 0 && ImproveCategories.IsMatch(articleText)) articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized"); // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems // nl wiki doesn't use {{Uncategorized}} template // prevent wictionary redirects from being tagged as uncategorised if (words > 6 && totalCategories == 0 && !WikiRegexes.Uncat.IsMatch(articleText) && Variables.LangCode != "nl") { if (WikiRegexes.Stub.IsMatch(commentsStripped)) { // add uncategorized stub tag if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]"); } else { articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]"); } } else { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]"); } else { articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|uncategorised]]"); } } } // remove {{Uncategorized}} if > 0 real categories (stub categories not counted) // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted) if (WikiRegexes.Uncat.IsMatch(articleText)) { if (totalCategories > 0) { articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart(); if (Variables.LangCode.Equals("ar")) tagsRemoved.Add("غير مصنفة"); else if (Variables.LangCode.Equals("arz")) tagsRemoved.Add("مش متصنفه"); else tagsRemoved.Add("uncategorised"); } else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped)) { string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value; if (!uncatname.Contains("stub")) { if (Variables.LangCode.Equals("ar")) articleText = Tools.RenameTemplate(articleText, uncatname, "بذرة غير مصنفة"); else articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub"); } } } if (wikiLinkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText)) { // add dead-end tag if (Variables.LangCode.Equals("ar")) { articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]"); // if dead end then remove underlinked if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكي"); } } else if (Variables.LangCode.Equals("arz")) { articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]"); // if dead end then remove underlinked if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكى"); } } else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText) && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b")) { // Don't add excess newlines between new tags articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText; tagsAdded.Add("[[CAT:DE|deadend]]"); // if dead end then remove underlinked if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1) { articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart(); tagsRemoved.Add("underlinked"); } } } // add wikify tag, don't add underlinked/wikify if {{dead end}} already present // Dont' tag SIA pages, may create wikilinks from templates else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText) && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify") && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText)) { // Avoid excess newlines between templates string templateEnd = "}}\r\n" + (articleText.StartsWith(@"{{") ? "" : "\r\n"); if (Variables.LangCode.Equals("ar")) { articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[وب:ويكي|ويكي]]"); } else if (Variables.LangCode.Equals("arz")) { articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[قالب:ويكى|ويكى]]"); } else if (Variables.LangCode.Equals("sv")) { articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[WP:WFY|wikify]]"); } else { articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[CAT:UL|underlinked]]"); } } else if (wikiLinkCount > 3 && !underlinked && WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.Wikify.Replace(articleText, ""); else // remove wikify, except section templates or wikify tags with reason parameter specified articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("ويكي"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("ويكى"); } else { tagsRemoved.Add("underlinked"); } } } // rename unreferenced --> refimprove if has existing refs, update date if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped) && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0) { articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}")); // update tag in old-style multiple issues Match m = WikiRegexes.MultipleIssues.Match(articleText); if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0) { string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove"); newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"); if (!newValue.Equals(m.Value)) articleText = articleText.Replace(m.Value, newValue); } } if (tagsAdded.Count > 0 || tagsRemoved.Count > 0) { Parsers p = new Parsers(); HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); articleText = p.MultipleIssues(articleText); articleText = Conversions(articleText); articleText = ht.AddBackUnformatted(articleText); // sort again in case tag removal requires whitespace cleanup // Don't sort interwikis, we can't specify the correct InterWikiSortOrder p.SortInterwikis = false; articleText = p.Sorter.Sort(articleText, articleTitle); } summary = PrepareTaggerEditSummary(); return articleText; }
/// <summary> /// Sets the date (month & year) for undated cleanup tags that take a date /// Avoids changing tags in unformatted text areas (wiki comments etc.) /// Note: bugzilla 2700 means {{ssubst}} within ref tags doesn't work, AWB doesn't do anything about it /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <returns>The updated article text</returns> public static string TagUpdater(string articleText) { HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); foreach (KeyValuePair<Regex, string> k in RegexTagger) { articleText = k.Key.Replace(articleText, m => (Tools.GetTemplateParameterValue(m.Value, "Date").Length > 0 ? Tools.RenameTemplateParameter(m.Value, "Date", "date") : k.Value.Replace("$1", m.Groups[1].Value))); } return ht.AddBackUnformatted(articleText); }