/// <summary> /// Extracts all of the interwiki and interwiki featured article links from the article text /// Ignores interwikis in comments/nowiki tags /// </summary> /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param> /// <returns>string of interwiki and interwiki featured article links</returns> public string Interwikis(ref string articleText) { string interWikiComment = ""; if (InterLangRegex.IsMatch(articleText)) { interWikiComment = InterLangRegex.Match(articleText).Value; articleText = articleText.Replace(interWikiComment, ""); } // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment HideText hider = new HideText(false, true, false); articleText = hider.Hide(articleText); string interWikis = ListToString(RemoveLinkFGAs(ref articleText)); if (interWikiComment.Length > 0) { interWikis += interWikiComment + "\r\n"; } interWikis += ListToString(RemoveInterWikis(ref articleText)); articleText = hider.AddBack(articleText); return(interWikis); }
private string HideMore(string text, bool hideOnlyTargetOfWikilink) { Hider = new HideText(); string s = Hider.HideMore(text, hideOnlyTargetOfWikilink); Assert.AreEqual(text, Hider.AddBackMore(s)); return s; }
private string Hide(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages) { Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages); string s = Hider.Hide(text); Assert.AreEqual(text, Hider.AddBack(s)); return s; }
/// <summary> /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox /// </summary> private bool AddedBoldIsValid(string articleText, string escapedTitle) { HideText Hider2 = new HideText(true, true, true); Regex RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase); int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index; int firstBoldPos = RegexFirstBold.Match(articleText).Length; articleText = WikiRegexes.NestedTemplates.Replace(articleText, ""); articleText = Hider2.HideMore(articleText); // was bold added in first 5% of article? bool inFirst5Percent = false; int articlelength = articleText.Length; if (articlelength > 5) { inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''"); } articleText = Hider2.AddBackMore(articleText); // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article return(inFirst5Percent && boldAddedPos <= firstBoldPos); }
/// <summary> /// Extracts all of the interwiki and interwiki featured article links from the article text /// Ignores interwikis in comments/nowiki tags /// </summary> /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param> /// <returns>string of interwiki and interwiki featured article links</returns> public string Interwikis(ref string articleText) { // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment HideText hider = new HideText(false, true, false); articleText = hider.Hide(articleText); string interWikis = ListToString(RemoveLinkFGAs(ref articleText)) + ListToString(RemoveInterWikis(ref articleText)); articleText = hider.AddBack(articleText); return(interWikis); }
public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary) { Summary = ""; if (TyposCount == 0) { NoChange = true; return(ArticleText); } if (IgnoreRegex.IsMatch(ArticleText)) { NoChange = true; return(ArticleText); } HideText RemoveText = new HideText(true, false, true); ArticleText = RemoveText.HideMore(ArticleText); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(ArticleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) { ArticleText = ArticleText.Remove(m.Index); } string originalText = ArticleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref ArticleText, ref strSummary); } NoChange = (originalText == ArticleText); ArticleText = RemoveText.AddBackMore(ArticleText + tail); if (!string.IsNullOrEmpty(strSummary)) { strSummary = Variables.TypoSummaryTag + strSummary.Trim(); Summary = strSummary; } return(ArticleText); }
/// <summary> /// Performs typo fixes against the article text. /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange"></param> /// <param name="summary"></param> /// <param name="articleTitle">Title of the article</param> /// <returns></returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText)) { noChange = true; return(articleText); } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) { articleText = articleText.Remove(m.Index); } string originalText = articleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } noChange = (originalText.Equals(articleText)); summary = Variables.TypoSummaryTag + strSummary.Trim(); return(removeText.AddBackMore(articleText + tail)); }
/// <summary> /// Checks for known typos on the page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">Title of the article</param> /// <returns>whether there are typos on the page</returns> public bool DetectTypo(string articleText, string articleTitle) { string originalArticleText = articleText; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) { return(false); } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); if (m.Success) { articleText = articleText.Remove(m.Index); } string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); if (strSummary.Length > 0) { return(true); } } return(false); }
// Covered by: BoldTitleTests /// <summary> /// '''Emboldens''' the first occurrence of the article title, if not already bold /// 1) Cleans up bolded self wikilinks /// 2) Cleans up self wikilinks /// 3) '''Emboldens''' the first occurrence of the article title /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The title of the article.</param> /// <param name="noChange">Value that indicated whether no change was made.</param> /// <returns>The modified article text.</returns> public string BoldTitle(string articleText, string articleTitle, out bool noChange) { noChange = true; List<string> alltemplates = GetAllTemplates(articleText); if(TemplateExists(alltemplates, NoBoldTitle)) return articleText; HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true); // 1) clean up bolded self links first, provided no noinclude use in article string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText); if(!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) articleText = afterSelfLinks; // 2) Clean up self wikilinks string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText); string restOfArticle = articleText.Substring(zerothSection.Length); string zerothSectionHidden, zerothSectionHiddenOriginal; // first check for any self links and no bold title, if found just convert first link to bold and return // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks // don't apply if bold in lead section already or some noinclude transclusion business if(!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) { // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false); zerothSectionHiddenOriginal = zerothSectionHidden; zerothSectionHidden = SelfLinks(zerothSectionHidden, articleTitle); zerothSection = Hider2.AddBackMore(zerothSectionHidden); if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden)) { noChange = false; return (zerothSection + restOfArticle); } } // Performance check: if article title not in zeroth section have nothing further to do if(zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0) return articleTextAtStart; // 3) '''Emboldens''' the first occurrence of the article title // ignore date articles (date in American or international format), nihongo title if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle) || TemplateExists(alltemplates, NihongoTitle)) return articleTextAtStart; string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, "")); Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''"); Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''"); // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything // ignore any bold in infoboxes if(BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection)) return articleTextAtStart; string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText)); if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox) || BoldTitleAlready3.IsMatch(articleTextNoInfobox)) return articleTextAtStart; // so no self links to remove, check for the need to add bold string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, ""); // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc. int fivepc = articleTextNoTemplates.Length / 20; if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''")) return articleTextAtStart; Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])"); zerothSectionHidden = Hider3.HideMore(zerothSection); zerothSectionHiddenOriginal = zerothSectionHidden; // first try title with brackets removed zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1); zerothSection = Hider3.AddBackMore(zerothSectionHidden); articleText = zerothSection + restOfArticle; // check that the bold added is the first bit in bold in the main body of the article if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets)) { noChange = false; return articleText; } return articleTextAtStart; }
public void HideMore() { Hider = new HideText(true, false, true); string text = Hider.HideMore("[[foo]]", false, true); RegexAssert.IsMatch(AllHidden, text); text = Hider.AddBackMore(text); Assert.AreEqual("[[foo]]", text); }
public void Unicodify() { Parsers Parser = new Parsers(); HideText RemoveText = new HideText(false, true, false); Article a = new Article("a", @"'''test'''. z & a‡ †. {{DEFAULTSORT:Hello test}} [[Category:Test pages]] "); a.Unicodify(true, Parser, RemoveText); Assert.AreEqual(@"'''test'''. z & a‡ †. {{DEFAULTSORT:Hello test}} [[Category:Test pages]] ", a.ArticleText, "Text unicodified"); a = new Article("a", @"'''test'''. z & {{t|a‡ †}}. {{DEFAULTSORT:Hello test}} [[Category:Test pages]] "); a.Unicodify(true, Parser, RemoveText); Assert.AreEqual(@"'''test'''. z & {{t|a‡ †}}. {{DEFAULTSORT:Hello test}} [[Category:Test pages]] ", a.ArticleText, "Text unicodified, hidemore used"); a = new Article("a", @"ABC"); a.Unicodify(true, Parser, RemoveText); Assert.AreEqual(@"ABC", a.ArticleText, "No change"); }
/// <summary> /// Sets the date (month & year) for undated cleanup tags that take a date /// Avoids changing tags in unformatted text areas (wiki comments etc.) /// Note: bugzilla 2700 means {{ssubst}} within ref tags doesn't work, AWB doesn't do anything about it /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <returns>The updated article text</returns> public static string TagUpdater(string articleText) { HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); foreach (KeyValuePair<Regex, string> k in RegexTagger) { articleText = k.Key.Replace(articleText, m => (Tools.GetTemplateParameterValue(m.Value, "Date").Length > 0 ? Tools.RenameTemplateParameter(m.Value, "Date", "date") : k.Value.Replace("$1", m.Groups[1].Value))); } return ht.AddBackUnformatted(articleText); }
/// <summary> /// Performs typo fixes against the article text. /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange"></param> /// <param name="summary"></param> /// <param name="articleTitle">Title of the article</param> /// <returns></returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText)) { noChange = true; return articleText; } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) articleText = articleText.Remove(m.Index); string originalText = articleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } noChange = (originalText.Equals(articleText)); summary = Variables.TypoSummaryTag + strSummary.Trim(); return removeText.AddBackMore(articleText + tail); }
// Covered by: LinkTests.TestBulletExternalLinks() /// <summary> /// Adds bullet points to external links after "external links" header /// </summary> /// <param name="ArticleText">The wiki text of the article.</param> /// <returns>The modified article text.</returns> public static string BulletExternalLinks(string ArticleText) { int intStart = 0; string articleTextSubstring = ""; Match m = Regex.Match(ArticleText, @"=\s*(?:external)?\s*links\s*=", RegexOptions.IgnoreCase | RegexOptions.RightToLeft); if (!m.Success) return ArticleText; intStart = m.Index; articleTextSubstring = ArticleText.Substring(intStart); ArticleText = ArticleText.Substring(0, intStart); HideText ht = new HideText(false, true, false); articleTextSubstring = ht.HideMore(articleTextSubstring); articleTextSubstring = Regex.Replace(articleTextSubstring, "(\r\n|\n)?(\r\n|\n)(\\[?http)", "$2* $3"); articleTextSubstring = ht.AddBackMore(articleTextSubstring); ArticleText += articleTextSubstring; return ArticleText; }
/// <summary> /// Extracts all of the interwiki and interwiki featured article links from the article text /// Ignores interwikis in comments/nowiki tags /// </summary> /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param> /// <returns>string of interwiki and interwiki featured article links</returns> public string Interwikis(ref string articleText) { // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment HideText hider = new HideText(false, true, false); articleText = hider.Hide(articleText); string interWikis = ListToString(RemoveLinkFGAs(ref articleText)) + ListToString(RemoveInterWikis(ref articleText)); articleText = hider.AddBack(articleText); return interWikis; }
/// <summary> /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox /// </summary> private bool AddedBoldIsValid(string articleText, string escapedTitle) { HideText Hider2 = new HideText(true, true, true); string articletextoriginal = articleText; Regex regexBoldAdded = new Regex(@"^(.*?)'''" + escapedTitle, RegexOptions.Singleline); int boldAddedPos = regexBoldAdded.Match(articleText).Length - Regex.Unescape(escapedTitle).Length; int firstBoldPos = RegexFirstBold.Match(articleText).Length; articleText = Hider2.HideMore(articleText); // was bold added in first 5% of article? bool inFirst5Percent = articleText.Substring(0, articleText.Length / 20).Contains("'''"); //articleText = Hider2.AddBackMore(articleText); // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article if (inFirst5Percent && boldAddedPos <= firstBoldPos) return true; // second check: bold just after infobox Regex boldAfterInfobox = new Regex(WikiRegexes.InfoBox + @"\s*'''" + escapedTitle); return boldAfterInfobox.IsMatch(articletextoriginal); }
/// <summary> /// Sets the date (month & year) for undated cleanup tags that take a date /// Avoids changing tags in unformatted text areas (wiki comments etc.) /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <returns>The updated article text</returns> public static string TagUpdater(string articleText) { HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); foreach (KeyValuePair<Regex, string> k in RegexTagger) { articleText = k.Key.Replace(articleText, k.Value); } return ht.AddBackUnformatted(articleText); }
// Covered by: BoldTitleTests /// <summary> /// '''Emboldens''' the first occurrence of the article title, if not already bold /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The title of the article.</param> /// <param name="noChange">Value that indicated whether no change was made.</param> /// <returns>The modified article text.</returns> public string BoldTitle(string articleText, string articleTitle, out bool noChange) { HideText Hider2 = new HideText(); HideText Hider3 = new HideText(true, true, true); // clean up bolded self links first articleText = BoldedSelfLinks(articleTitle, articleText); noChange = true; string escTitle = Regex.Escape(articleTitle); string escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, "")); string articleTextAtStart = articleText; string zerothSection = WikiRegexes.ZerothSection.Match(articleText).Value; string restOfArticle = articleText.Remove(0, zerothSection.Length); // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct string zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false); string zerothSectionHiddenOriginal = zerothSectionHidden; // first check for any self links and no bold title, if found just convert first link to bold and return Regex r1 = new Regex(@"\[\[\s*" + escTitle + @"\s*\]\]"); Regex r2 = new Regex(@"\[\[\s*" + Tools.TurnFirstToLower(escTitle) + @"\s*\]\]"); // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks // don't apply if bold in lead section already or some noinclude transclusion business if (!Regex.IsMatch(zerothSection, "'''" + escTitle + "'''") && !WikiRegexes.Noinclude.IsMatch(articleText) && !WikiRegexes.Includeonly.IsMatch(articleText)) zerothSectionHidden = r1.Replace(zerothSectionHidden, "'''" + articleTitle + @"'''"); if (zerothSectionHiddenOriginal == zerothSectionHidden && !Regex.IsMatch(zerothSection, @"'''" + Tools.TurnFirstToLower(escTitle) + @"'''")) zerothSectionHidden = r2.Replace(zerothSectionHidden, "'''" + Tools.TurnFirstToLower(articleTitle) + @"'''"); zerothSection = Hider2.AddBackMore(zerothSectionHidden); if (zerothSectionHiddenOriginal != zerothSectionHidden) { noChange = false; return (zerothSection + restOfArticle); } // ignore date articles (date in American or international format) if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle)) return articleTextAtStart; Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''"); Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''"); //if title in bold already exists in article, or page starts with something in bold, don't change anything if (boldTitleAlready1.IsMatch(articleText) || boldTitleAlready2.IsMatch(articleText) || BoldTitleAlready3.IsMatch(articleText)) return articleTextAtStart; // so no self links to remove, check for the need to add bold string articleTextHidden = Hider3.HideMore(articleText); // first quick check: ignore articles with some bold in first 5% of hidemore article int fivepc = articleTextHidden.Length / 20; if (articleTextHidden.Substring(0, fivepc).Contains("'''")) { //articleText = Hider3.AddBackMore(articleTextHidden); return articleTextAtStart; } Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])"); // first try title with brackets removed if (regexBoldNoBrackets.IsMatch(articleTextHidden)) articleTextHidden = regexBoldNoBrackets.Replace(articleTextHidden, "$1'''$2'''$3", 1); articleText = Hider3.AddBackMore(articleTextHidden); // check that the bold added is the first bit in bold in the main body of the article if (AddedBoldIsValid(articleText, escTitleNoBrackets)) { noChange = false; return articleText; } return articleTextAtStart; }
public void HideImages() { Assert.IsFalse(Hide(@"[[File:foo.jpg]]").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg]]").Contains("foo"), "with space"); Assert.IsFalse(Hide(@"[[File:foo.jpeg]]").Contains("foo"), "jpeg"); Assert.IsFalse(Hide(@"[[File:foo.JPEG]]").Contains("foo"), "JPEG"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg]]").Contains("foo"), "space and jpeg"); Assert.IsFalse(Hide(@"[[Image:foo.jpeg]]").Contains("foo"), "Image jpeg"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpg]]").Contains("foo"), "image jpeg space"); Assert.IsFalse(Hide(@"[[File:foo.jpg|").Contains("foo"), "To pipe"); Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg|").Contains("foo"), "Space to pipe"); Assert.IsFalse(Hide(@"[[File:foo.jpeg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo.jpeg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.jpg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[File:foo.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[File:foo.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo_here.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:westminster.tube.station.jubilee.arp.jpg|").Contains("westminster.tube.station.jubilee.arp"), "Dot name"); Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").StartsWith("[["), "Retain starting brackets"); Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").Contains(@"thumb|140px|[[Jo]] Assistant [[Ge]]]]"), "Retain ending brackets"); AssertAllHidden(@"<imagemap> File:Blogs001.jpeg|Description File:Blogs002.jpeg|Description </imagemap>"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets"); HideText h = new HideText(true, false, false); Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks"); }
//TODO:Needs re-write /// <summary> /// If necessary, adds/removes wikify or stub tag /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The article title.</param> /// <param name="restrictOrphanTagging"></param> /// <param name="summary"></param> /// <returns>The tagged article.</returns> public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary) { // don't tag redirects/outside article namespace/no tagging changes if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText)) return articleText; tagsRemoved.Clear(); tagsAdded.Clear(); string commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); Sorter.Interwikis(ref commentsStripped); // bulleted or indented text should weigh less than simple text. // for example, actor stubs may contain large filmographies string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, ""); int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped))/2; // remove stub tags from long articles if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped)) { articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim(); tagsRemoved.Add("stub"); } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); // on en wiki, remove expand template when a stub template exists // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests/Archive_5#Remove_.7B.7Bexpand.7D.7D_when_a_stub_template_exists if (Variables.LangCode == "en" && WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && WikiRegexes.Expand.IsMatch(commentsCategoriesStripped)) { articleText = WikiRegexes.Expand.Replace(articleText, ""); tagsRemoved.Add("expand"); } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); // do orphan tagging before template analysis for categorisation tags articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging); articleText = TagRefsIbid(articleText); articleText = TagEmptySection(articleText); int totalCategories; int linkCount = Tools.LinkCount(commentsStripped); #if DEBUG || UNITTEST if (Globals.UnitTestMode) { totalCategories = Globals.UnitTestIntValue; } else #endif { // stubs add non-hidden stub categories, don't count these in categories count List<Article> Cats = CategoryProv.MakeList(new[] {articleTitle}); List<Article> CatsNotStubs = new List<Article>(); foreach (Article a in Cats) { if (!a.Name.EndsWith(" stubs") && !a.Name.EndsWith(":Stubs")) CatsNotStubs.Add(a); } totalCategories = CatsNotStubs.Count; } if (linkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText)) { articleText = WikiRegexes.DeadEnd.Replace(articleText, new MatchEvaluator(SectionTagME)); if(!WikiRegexes.DeadEnd.IsMatch(articleText)) tagsRemoved.Add("deadend"); } // discount persondata along with comments and categories from wikify and stub evaluation int length = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "").Length + 1; bool underlinked = (linkCount < 0.0025*length); if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped)) { // add stub tag articleText += Tools.Newline("{{stub}}", 3); tagsAdded.Add("stub"); commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); } // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems // nl wiki doesn't use {{Uncategorized}} template // prevent wictionary redirects from being tagged as uncategorised if (words > 6 && totalCategories == 0 && !WikiRegexes.Uncat.IsMatch(articleText) && Variables.LangCode != "nl" && !Tools.NestedTemplateRegex("cat improve").IsMatch(articleText) // category count is from API; don't add uncat tag if genfixes added person categories && !WikiRegexes.DeathsOrLivingCategory.IsMatch(articleText) && !WikiRegexes.BirthsCategory.IsMatch(articleText)) { if (WikiRegexes.Stub.IsMatch(commentsStripped)) { // add uncategorized stub tag articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]"); } else { // add uncategorized tag articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|uncategorised]]"); } } // remove {{Uncategorized}} if > 0 real categories (stub categories not counted) // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted) if (WikiRegexes.Uncat.IsMatch(articleText)) { if (totalCategories > 0) { articleText = WikiRegexes.Uncat.Replace(articleText, ""); tagsRemoved.Add("uncategorised"); } else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped)) { string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value; if (!uncatname.Contains("stub")) articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub"); } } if (linkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && Variables.LangCode != "sv" && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b")) { // add dead-end tag articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[:Category:Dead-end pages|deadend]]"); } if (linkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText) && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")) { // add wikify tag articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[WP:WFY|wikify]]"); } else if (linkCount > 3 && !underlinked && WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, new MatchEvaluator(SectionTagME)); if(!WikiRegexes.Wikify.IsMatch(articleText)) tagsRemoved.Add("wikify"); } // rename unreferenced --> refimprove if has existing refs if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped) && WikiRegexes.Refs.Matches(commentsCategoriesStripped).Count > 0) { articleText = Tools.RenameTemplate(articleText, "unreferenced", "refimprove", true); Match m = WikiRegexes.MultipleIssues.Match(articleText); if(m.Success) { string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove"); if(!newValue.Equals(m.Value)) articleText = articleText.Replace(m.Value, newValue); } } if (tagsAdded.Count > 0 || tagsRemoved.Count > 0) { Parsers p = new Parsers(); HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); articleText = p.MultipleIssues(articleText); articleText = Conversions(articleText); articleText = ht.AddBackUnformatted(articleText); // sort again in case tag removal requires whitespace cleanup articleText = p.Sorter.Sort(articleText, articleTitle); } summary = PrepareTaggerEditSummary(); return articleText; }
/// <summary> /// Extracts all of the interwiki and interwiki featured article links from the article text /// Ignores interwikis in comments/nowiki tags /// </summary> /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param> /// <returns>string of interwiki and interwiki featured article links</returns> public string Interwikis(ref string articleText) { string interWikiComment = ""; if (InterLangRegex.IsMatch(articleText)) { interWikiComment = InterLangRegex.Match(articleText).Value; articleText = articleText.Replace(interWikiComment, ""); } // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment HideText hider = new HideText(false, true, false); articleText = hider.Hide(articleText); string interWikis = ListToString(RemoveLinkFGAs(ref articleText)); if(interWikiComment.Length > 0) interWikis += interWikiComment + "\r\n"; interWikis += ListToString(RemoveInterWikis(ref articleText)); articleText = hider.AddBack(articleText); return interWikis; }
//TODO:Needs re-write /// <summary> /// If necessary, adds/removes wikify or stub tag /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The article title.</param> /// <param name="restrictOrphanTagging"></param> /// <param name="summary"></param> /// <returns>The tagged article.</returns> public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary) { if(!TaggerPermitted(articleText, articleTitle)) return articleText; tagsRemoved.Clear(); tagsAdded.Clear(); int tagsrenamed = 0; string commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); Sorter.Interwikis(ref commentsStripped); // bulleted or indented text should weigh less than simple text. // for example, actor stubs may contain large filmographies string crapStripped = BulletedText.Replace(commentsCategoriesStripped, ""); int words = (Tools.WordCount(commentsCategoriesStripped, 999) + Tools.WordCount(crapStripped, 999)) / 2; // remove stub tags from long articles, don't move section stubs if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped)) { articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim(); if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("بذرة"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("تقاوى"); } else if (Variables.LangCode.Equals("hy")) { tagsRemoved.Add("Անավարտ"); } else { tagsRemoved.Add("stub"); } } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); //remove disambiguation if disambiguation cleanup exists (en-wiki only) if (Variables.LangCode.Equals("en") && WikiRegexes.DisambigsCleanup.IsMatch(commentsStripped)) { articleText = WikiRegexes.DisambigsGeneral.Replace(articleText, "").Trim(); } // do orphan tagging before template analysis for categorisation tags articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging); articleText = TagRefsIbid(articleText); articleText = TagEmptySection(articleText); int totalCategories; // ignore commented out wikilinks, and any in {{Proposed deletion/dated}} int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, "")); #if DEBUG || UNITTEST if (Globals.UnitTestMode) { totalCategories = Globals.UnitTestIntValue; } else #endif { // stubs add non-hidden stub categories, don't count these in categories count // also don't count "Proposed deletion..." cats // limitation: in the unlikely event that the article has only redlinked cats then it is {{uncat}} but we won't tag it as such totalCategories = RegularCategories(articleText).Count; // templates may add categories to page that are not [[Category...]] links, so use API call for accurate Category count if(totalCategories == 0) totalCategories = RegularCategories(CategoryProv.MakeList(new[] { articleTitle })).Count; } // remove dead end if > 0 wikilinks on page if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.DeadEnd.Replace(articleText, ""); else articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.DeadEnd.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("نهاية مسدودة"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("نهاية مسدودة"); } else { tagsRemoved.Add("deadend"); } } } // discount persondata, comments, infoboxes and categories from wikify/underlinked and stub evaluation string lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, ""); lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, ""); lengthtext = Drugbox.Replace(lengthtext, ""); int length = lengthtext.Length + 1; bool underlinked = (wikiLinkCount < 0.0025 * length); if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(commentsCategoriesStripped)) { // add stub tag. Exclude pages their title starts with "List of..." if (!ListOf.IsMatch(articleTitle) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle)) { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة}}", 3); tagsAdded.Add("بذرة"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى}}", 3); tagsAdded.Add("تقاوى"); } else if (Variables.LangCode.Equals("hy")) { articleText += Tools.Newline("{{Անավարտ}}", 3); tagsAdded.Add("Անավարտ"); } else { articleText += Tools.Newline("{{stub}}", 3); tagsAdded.Add("stub"); } commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); } } // rename existing {{improve categories}} else add uncategorized tag if (totalCategories == 0 && ImproveCategories.IsMatch(articleText)) articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized"); // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems // nl wiki doesn't use {{Uncategorized}} template // prevent wictionary redirects from being tagged as uncategorised if (words > 6 && totalCategories == 0 && !WikiRegexes.Uncat.IsMatch(articleText) && Variables.LangCode != "nl") { if (WikiRegexes.Stub.IsMatch(commentsStripped)) { // add uncategorized stub tag if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]"); } else if(Variables.LangCode.Equals("hy")) // same template for uncat and uncat stub { articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("Կատեգորիա չկա"); } else if(Variables.LangCode.Equals("sv")) // same template for uncat and uncat stub { articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]"); } else { articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]"); } } else { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]"); } else if(Variables.LangCode.Equals("el")) { articleText += Tools.Newline("{{Ακατηγοριοποίητο|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[Πρότυπο:Ακατηγοριοποίητο|ακατηγοριοποίητο]]"); } else if(Variables.LangCode.Equals("hy")) { articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("Կատեգորիա չկա"); } else if(Variables.LangCode.Equals("sv")) { articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]"); } else { articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|uncategorised]]"); } } } // remove {{Uncategorized}} if > 0 real categories (stub categories not counted) // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted) if (WikiRegexes.Uncat.IsMatch(articleText)) { if (totalCategories > 0) { articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart(); if (Variables.LangCode.Equals("ar")) tagsRemoved.Add("غير مصنفة"); else if (Variables.LangCode.Equals("arz")) tagsRemoved.Add("مش متصنفه"); else tagsRemoved.Add("uncategorised"); } else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped)) { // rename uncat to uncat stub if no uncat stub. If uncat and uncat stub, remove uncat. bool uncatstub = false; foreach(Match u in WikiRegexes.Uncat.Matches(articleText)) { if(WikiRegexes.Stub.IsMatch(u.Value)) { uncatstub = true; break; } } articleText = WikiRegexes.Uncat.Replace(articleText, u2 => { if (!uncatstub) // rename { tagsrenamed++; if (Variables.LangCode.Equals("ar")) return Tools.RenameTemplate(u2.Value, "بذرة غير مصنفة"); else if (Variables.LangCode.Equals("arz")) return Tools.RenameTemplate(u2.Value, "تقاوى مش متصنفه"); else if (Variables.LangCode.Equals("en") || Variables.LangCode.Equals("simple")) return Tools.RenameTemplate(u2.Value, "Uncategorized stub"); } else // already uncat stub so remove plain uncat { if(!WikiRegexes.Stub.IsMatch(u2.Value)) { if (Variables.LangCode.Equals("ar")) tagsRemoved.Add("غير مصنفة"); else if (Variables.LangCode.Equals("arz")) tagsRemoved.Add("مش متصنفه"); else tagsRemoved.Add("uncategorised"); return ""; } } return u2.Value; }); } } if (wikiLinkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle) ) { // add dead-end tag // no blank line between dead end and orphan tags for ar/arz if (Variables.LangCode.Equals("ar")) { articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + (WikiRegexes.Orphan.IsMatch(articleText) ? "" : "\r\n") + articleText; tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]"); // if dead end then remove underlinked/wikify if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكي"); } } else if (Variables.LangCode.Equals("arz")) { articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + articleText; tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]"); // if dead end then remove underlinked if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكى"); } } else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText) && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b") && !MinorPlanetListFooter.IsMatch(articleText)) { // Don't add excess newlines between new tags articleText = "{{Dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText; tagsAdded.Add("[[CAT:DE|deadend]]"); // if dead end then remove underlinked if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1) { articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart(); tagsRemoved.Add("underlinked"); } } } // add underlinked/wikify tag, don't add underlinked/wikify if {{dead end}} already present // Dont' tag SIA pages, may create wikilinks from templates else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText) && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify") && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle)) { // Avoid excess newlines between templates string templateEnd = "}}\r\n" + (articleText.TrimStart().StartsWith(@"{{") ? "" : "\r\n"); if (Variables.LangCode.Equals("ar")) { articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText.TrimStart(); tagsAdded.Add("[[وب:ويكي|ويكي]]"); } else if (Variables.LangCode.Equals("arz")) { articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[قالب:ويكى|ويكى]]"); } else if (Variables.LangCode.Equals("sv")) { articleText = "{{Ickewiki|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[WP:PW|ickewiki]]"); } else { articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[CAT:UL|underlinked]]"); } } else if (wikiLinkCount > 3 && !underlinked && WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.Wikify.Replace(articleText, ""); else // remove wikify, except section templates or wikify tags with reason parameter specified articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("ويكي"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("ويكى"); } else { tagsRemoved.Add("underlinked"); } } } // rename unreferenced --> refimprove if has existing refs, update date if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped) && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0) { articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}")); // update tag in old-style multiple issues Match m = WikiRegexes.MultipleIssues.Match(articleText); if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0) { string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove"); newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"); if (!newValue.Equals(m.Value)) articleText = articleText.Replace(m.Value, newValue); } } if (tagsAdded.Count > 0 || tagsRemoved.Count > 0 || tagsrenamed > 0) { Parsers p = new Parsers(); HideText hider = new HideText(); articleText = hider.HideUnformatted(articleText); articleText = p.MultipleIssues(articleText); articleText = Conversions(articleText); articleText = hider.AddBackUnformatted(articleText); // sort again in case tag removal requires whitespace cleanup // Don't sort interwikis, we can't specify the correct InterWikiSortOrder p.SortInterwikis = false; articleText = p.Sorter.Sort(articleText, articleTitle); } summary = PrepareTaggerEditSummary(); return articleText; }
/// <summary> /// Performs typo fixes against the article text in multi-threaded mode /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange">True if no typos fixed</param> /// <param name="summary">Edit summary</param> /// <param name="articleTitle">Title of the article</param> /// <returns>Updated article text</returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) { noChange = true; return(articleText); } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) { articleText = articleText.Remove(m.Index); } string originalText = articleText; string strSummary = ""; /* Run typos threaded, one thread per group for better performance * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart * http://www.dotnetperls.com/parameterizedthreadstart * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */ resultSummary.Clear(); resultArticleText.Clear(); Thread[] array = new Thread[Groups.Count]; int i = 0; foreach (TypoGroup tg in Groups) { array[i] = new Thread(delegate(object unused) { tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText); }); array[i].Start(i); i++; } // Join all the threads: wait for all to complete for (int j = 0; j < array.Length; j++) { array[j].Join(); } string groupSummary, groupArticleText; foreach (TypoGroup tg in Groups) { resultSummary.TryGetValue(tg.GroupSize, out groupSummary); resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText); if (groupSummary.Length > 0) { if (strSummary.Length > 0) { // earlier thread had changes, so need to re-run this one tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } else { strSummary = groupSummary; articleText = groupArticleText; } } } noChange = originalText.Equals(articleText); summary = Variables.TypoSummaryTag + strSummary.Trim(); return(removeText.AddBackMore(articleText + tail)); }
private string HideMore(string text, bool HideOnlyTargetOfWikilink) { Hider = new HideText(); return Hider.HideMore(text, HideOnlyTargetOfWikilink); }
// Covered by: LinkTests.FixDates() /// <summary> /// Fix date and decade formatting errors. /// </summary> /// <param name="ArticleText">The wiki text of the article.</param> /// <returns>The modified article text.</returns> public string FixDates(string ArticleText) { HideText hidetext = new HideText(); ArticleText = hidetext.HideMore(ArticleText); { ArticleText = FixDatesRaw(ArticleText); //Remove 2 or more <br />'s //This piece's existance here is counter-intuitive, but it requires HideMore() //and I don't want to call this slow function yet another time --MaxSem ArticleText = SyntaxRemoveBr.Replace(ArticleText, "\r\n"); ArticleText = SyntaxRemoveParagraphs.Replace(ArticleText, "\r\n\r\n"); } ArticleText = hidetext.AddBackMore(ArticleText); return ArticleText; }
private string Hide(string text, bool HideExternalLinks, bool LeaveMetaHeadings, bool HideImages) { Hider = new HideText(HideExternalLinks, LeaveMetaHeadings, HideImages); return Hider.Hide(text); }
public void PerformUniversalGeneralFixes() { HideText H = new HideText(); MockSkipOptions S = new MockSkipOptions(); Article ar1 = new Article("Hello", " '''Hello''' world text"); ar1.PerformUniversalGeneralFixes(); ar1.PerformGeneralFixes(parser, H, S, false, false, false); Assert.AreEqual("'''Hello''' world text", ar1.ArticleText); }
public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary) { Summary = ""; if (TyposCount == 0) { NoChange = true; return ArticleText; } if (IgnoreRegex.IsMatch(ArticleText)) { NoChange = true; return ArticleText; } HideText RemoveText = new HideText(true, false, true); ArticleText = RemoveText.HideMore(ArticleText); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(ArticleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) ArticleText = ArticleText.Remove(m.Index); string originalText = ArticleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref ArticleText, ref strSummary); } NoChange = (originalText == ArticleText); ArticleText = RemoveText.AddBackMore(ArticleText + tail); if (!string.IsNullOrEmpty(strSummary)) { strSummary = Variables.TypoSummaryTag + strSummary.Trim(); Summary = strSummary; } return ArticleText; }
private string Hide(string text) { hider = new HideText(); return hider.HideMore(text); }
private string HideMore(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages) { Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages); return Hider.HideMore(text); }
private void AssertBothHidden(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages) { Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages); AssertAllHidden(text); AssertAllHiddenMore(text); }
// Covered by: BoldTitleTests /// <summary> /// '''Emboldens''' the first occurrence of the article title, if not already bold /// 1) Cleans up bolded self wikilinks /// 2) Cleans up self wikilinks /// 3) '''Emboldens''' the first occurrence of the article title /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The title of the article.</param> /// <param name="noChange">Value that indicated whether no change was made.</param> /// <returns>The modified article text.</returns> public string BoldTitle(string articleText, string articleTitle, out bool noChange) { noChange = true; List <string> alltemplates = GetAllTemplates(articleText); if (TemplateExists(alltemplates, NoBoldTitle)) { return(articleText); } HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true); // 1) clean up bolded self links first, provided no noinclude use in article string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText); if (!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) { articleText = afterSelfLinks; } // 2) Clean up self wikilinks string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText); string restOfArticle = articleText.Substring(zerothSection.Length); string zerothSectionHidden, zerothSectionHiddenOriginal; // first check for any self links and no bold title, if found just convert first link to bold and return // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks // don't apply if bold in lead section already or some noinclude transclusion business if (!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) { // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false); zerothSectionHiddenOriginal = zerothSectionHidden; zerothSectionHidden = SelfLinks(zerothSectionHidden, articleTitle); zerothSection = Hider2.AddBackMore(zerothSectionHidden); if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden)) { noChange = false; return(zerothSection + restOfArticle); } } // Performance check: if article title not in zeroth section have nothing further to do if (zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0) { return(articleTextAtStart); } // 3) '''Emboldens''' the first occurrence of the article title // ignore date articles (date in American or international format), nihongo title if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle) || TemplateExists(alltemplates, NihongoTitle)) { return(articleTextAtStart); } string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, "")); Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''"); Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''"); // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything // ignore any bold in infoboxes if (BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection)) { return(articleTextAtStart); } string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText)); if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox) || BoldTitleAlready3.IsMatch(articleTextNoInfobox)) { return(articleTextAtStart); } // so no self links to remove, check for the need to add bold string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, ""); // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc. int fivepc = articleTextNoTemplates.Length / 20; if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''")) { return(articleTextAtStart); } Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])"); zerothSectionHidden = Hider3.HideMore(zerothSection); zerothSectionHiddenOriginal = zerothSectionHidden; // first try title with brackets removed zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1); zerothSection = Hider3.AddBackMore(zerothSectionHidden); articleText = zerothSection + restOfArticle; // check that the bold added is the first bit in bold in the main body of the article if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets)) { noChange = false; return(articleText); } return(articleTextAtStart); }
public void HideImages() { AssertAllHidden(@"[[File:foo.jpg]]"); AssertAllHidden(@"[[File:foo with space and 0004.jpg]]"); AssertAllHidden(@"[[File:foo.jpeg]]"); AssertAllHidden(@"[[File:foo.JPEG]]"); AssertAllHidden(@"[[Image:foo with space and 0004.jpeg]]"); AssertAllHidden(@"[[Image:foo.jpeg]]"); AssertAllHidden(@"[[Image:foo with space and 0004.jpg]]"); AssertAllHidden(@"[[File:foo.jpg|"); AssertAllHidden(@"[[File:foo with space and 0004.jpg|"); AssertAllHidden(@"[[File:foo.jpeg|"); AssertAllHidden(@"[[Image:foo with space and 0004.jpeg|"); AssertAllHidden(@"[[Image:foo.jpeg|"); AssertAllHidden(@"[[Image:foo with SPACE() and 0004.jpg|"); AssertAllHidden(@"[[File:foo.gif|"); AssertAllHidden(@"[[Image:foo with space and 0004.gif|"); AssertAllHidden(@"[[Image:foo.gif|"); AssertAllHidden(@"[[Image:foo with SPACE() and 0004.gif|"); AssertAllHidden(@"[[File:foo.png|"); AssertAllHidden(@"[[Image:foo with space and 0004.png|"); AssertAllHidden(@"[[Image:foo_here.png|"); AssertAllHidden(@"[[Image:foo with SPACE() and 0004.png|"); AssertAllHidden(@"[[Image:westminster.tube.station.jubilee.arp.jpg|"); AssertAllHidden(@"<imagemap> File:Blogs001.jpeg|Description File:Blogs002.jpeg|Description </imagemap>"); AssertBothHidden(@"[[File:foo.jpg]]"); AssertBothHidden(@"[[Image:foo with space and 0004.png|"); AssertBothHidden(@"[[Image:foo_here.png|"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets"); HideText h = new HideText(true, false, false); Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks"); }
/// <summary> /// Performs typo fixes against the article text in multi-threaded mode /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange">True if no typos fixed</param> /// <param name="summary">Edit summary</param> /// <param name="articleTitle">Title of the article</param> /// <returns>Updated article text</returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) { noChange = true; return articleText; } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); // remove newlines, whitespace and hide tokens from bottom // to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) articleText = articleText.Remove(m.Index); string originalText = articleText; string strSummary = ""; /* Run typos threaded, one thread per group for better performance * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart * http://www.dotnetperls.com/parameterizedthreadstart * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */ resultSummary.Clear(); resultArticleText.Clear(); Thread[] array = new Thread[Groups.Count]; int i = 0; foreach (TypoGroup tg in Groups) { array[i] = new Thread( delegate() { tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText); }); array[i].Start(); i++; } // Join all the threads: wait for all to complete foreach (Thread t in array) { t.Join(); } foreach (TypoGroup tg in Groups) { string groupSummary; resultSummary.TryGetValue(tg.GroupSize, out groupSummary); string groupArticleText; resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText); if (groupSummary.Length > 0) { if (strSummary.Length > 0) { // earlier thread had changes, so need to re-run this one tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } else { strSummary = groupSummary; articleText = groupArticleText; } } } noChange = originalText.Equals(articleText); summary = Variables.TypoSummaryTag + strSummary.Trim(); return removeText.AddBackMore(articleText + tail); }
/// <summary> /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox /// </summary> private bool AddedBoldIsValid(string articleText, string escapedTitle) { HideText Hider2 = new HideText(true, true, true); Regex RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase); int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index; int firstBoldPos = RegexFirstBold.Match(articleText).Length; articleText = WikiRegexes.NestedTemplates.Replace(articleText, ""); articleText = Hider2.HideMore(articleText); // was bold added in first 5% of article? bool inFirst5Percent = false; int articlelength = articleText.Length; if (articlelength > 5) inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''"); articleText = Hider2.AddBackMore(articleText); // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article return inFirst5Percent && boldAddedPos <= firstBoldPos; }
/// <summary> /// Checks for known typos on the page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">Title of the article</param> /// <returns>whether there are typos on the page</returns> public bool DetectTypo(string articleText, string articleTitle) { string originalArticleText = articleText; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) return false; HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); // remove newlines, whitespace and hide tokens from bottom // to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); if (m.Success) articleText = articleText.Remove(m.Index); string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); if (strSummary.Length > 0) return true; } return false; }
/// <summary> /// Fix date and decade formatting errors. /// </summary> /// <param name="ArticleText">The wiki text of the article.</param> /// <returns>The modified article text.</returns> public string FixDates(string ArticleText) { HideText hidetext = new HideText(); ArticleText = hidetext.HideMore(ArticleText); { ArticleText = FixDatesRaw(ArticleText); //Remove 2 or more <br />'s //This piece's existance here is counter-intuitive, but it requires HideMore() //and I don't want to call this slow function yet another time --MaxSem ArticleText = Regex.Replace(ArticleText.Trim(), @"(<br[\s/]*> *){2,}", "\r\n", RegexOptions.IgnoreCase); } ArticleText = hidetext.AddBackMore(ArticleText); return ArticleText; }
//TODO:Needs re-write /// <summary> /// If necessary, adds/removes wikify or stub tag /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The article title.</param> /// <param name="restrictOrphanTagging"></param> /// <param name="summary"></param> /// <returns>The tagged article.</returns> public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary) { // don't tag redirects/outside article namespace/no tagging changes if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText)) return articleText; tagsRemoved.Clear(); tagsAdded.Clear(); string commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); Sorter.Interwikis(ref commentsStripped); // bulleted or indented text should weigh less than simple text. // for example, actor stubs may contain large filmographies string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, ""); int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped)) / 2; // remove stub tags from long articles, don't move section stubs if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped)) { articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim(); tagsRemoved.Add("stub"); } // refresh commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, ""); // do orphan tagging before template analysis for categorisation tags articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging); articleText = TagRefsIbid(articleText); articleText = TagEmptySection(articleText); int totalCategories; // ignore commented out wikilinks, and any in {{Proposed deletion/dated}} int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, "")); #if DEBUG || UNITTEST if (Globals.UnitTestMode) { totalCategories = Globals.UnitTestIntValue; } else #endif { // stubs add non-hidden stub categories, don't count these in categories count // also don't count "Proposed deletion..." cats List<Article> Cats = CategoryProv.MakeList(new[] { articleTitle }); totalCategories = RegularCategories(Cats).Count; // cats may have been added to page by genfixes, F&R or user (when reparsing) so check cats on page if API says zero // so we correctly count for uncat tagging if(totalCategories == 0) totalCategories = RegularCategories(articleText).Count; } // remove dead end when wikilinks on page, but not for en-wiki where dead end can mean "not enough" links if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText) && !Variables.LangCode.Equals("en")) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.DeadEnd.Replace(articleText, ""); else articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.DeadEnd.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("نهاية مسدودة"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("نهاية مسدودة"); } else { tagsRemoved.Add("deadend"); } } } // discount persondata, comments, infoboxes and categories from wikify and stub evaluation string lengthtext = commentsCategoriesStripped; lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, ""); lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, ""); lengthtext = Drugbox.Replace(lengthtext, ""); int length = lengthtext.Length + 1; bool underlinked = (wikiLinkCount < 0.0025 * length); if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) && !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped)) { // add stub tag. Exclude pages their title starts with "List of..." if (!ListOf.IsMatch(articleTitle)) { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة}}", 3); tagsAdded.Add("بذرة"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى}}", 3); tagsAdded.Add("تقاوى"); } else { articleText += Tools.Newline("{{stub}}", 3); tagsAdded.Add("stub"); } commentsStripped = WikiRegexes.Comments.Replace(articleText, ""); } } // rename existing {{improve categories}} else add uncategorized tag if (totalCategories == 0 && ImproveCategories.IsMatch(articleText)) articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized"); // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems // nl wiki doesn't use {{Uncategorized}} template // prevent wictionary redirects from being tagged as uncategorised if (words > 6 && totalCategories == 0 && !WikiRegexes.Uncat.IsMatch(articleText) && Variables.LangCode != "nl") { if (WikiRegexes.Stub.IsMatch(commentsStripped)) { // add uncategorized stub tag if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]"); } else { articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]"); } } else { if (Variables.LangCode.Equals("ar")) { articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]"); } else if (Variables.LangCode.Equals("arz")) { articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]"); } else { articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}"; tagsAdded.Add("[[CAT:UNCAT|uncategorised]]"); } } } // remove {{Uncategorized}} if > 0 real categories (stub categories not counted) // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted) if (WikiRegexes.Uncat.IsMatch(articleText)) { if (totalCategories > 0) { articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart(); if (Variables.LangCode.Equals("ar")) tagsRemoved.Add("غير مصنفة"); else if (Variables.LangCode.Equals("arz")) tagsRemoved.Add("مش متصنفه"); else tagsRemoved.Add("uncategorised"); } else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped)) { string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value; if (!uncatname.Contains("stub")) { if (Variables.LangCode.Equals("ar")) articleText = Tools.RenameTemplate(articleText, uncatname, "بذرة غير مصنفة"); else articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub"); } } } if (wikiLinkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText)) { // add dead-end tag if (Variables.LangCode.Equals("ar")) { articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]"); // if dead end then remove underlinked if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكي"); } } else if (Variables.LangCode.Equals("arz")) { articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText; tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]"); // if dead end then remove underlinked if(WikiRegexes.Wikify.IsMatch(articleText)) { articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart(); tagsRemoved.Add("ويكى"); } } else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText) && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b")) { // Don't add excess newlines between new tags articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText; tagsAdded.Add("[[CAT:DE|deadend]]"); // if dead end then remove underlinked if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1) { articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart(); tagsRemoved.Add("underlinked"); } } } // add wikify tag, don't add underlinked/wikify if {{dead end}} already present // Dont' tag SIA pages, may create wikilinks from templates else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText) && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify") && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText)) { // Avoid excess newlines between templates string templateEnd = "}}\r\n" + (articleText.StartsWith(@"{{") ? "" : "\r\n"); if (Variables.LangCode.Equals("ar")) { articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[وب:ويكي|ويكي]]"); } else if (Variables.LangCode.Equals("arz")) { articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[قالب:ويكى|ويكى]]"); } else if (Variables.LangCode.Equals("sv")) { articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[WP:WFY|wikify]]"); } else { articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText; tagsAdded.Add("[[CAT:UL|underlinked]]"); } } else if (wikiLinkCount > 3 && !underlinked && WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) articleText = WikiRegexes.Wikify.Replace(articleText, ""); else // remove wikify, except section templates or wikify tags with reason parameter specified articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart(); if (!WikiRegexes.Wikify.IsMatch(articleText)) { if (Variables.LangCode.Equals("ar")) { tagsRemoved.Add("ويكي"); } else if (Variables.LangCode.Equals("arz")) { tagsRemoved.Add("ويكى"); } else { tagsRemoved.Add("underlinked"); } } } // rename unreferenced --> refimprove if has existing refs, update date if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped) && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0) { articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}")); // update tag in old-style multiple issues Match m = WikiRegexes.MultipleIssues.Match(articleText); if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0) { string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove"); newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"); if (!newValue.Equals(m.Value)) articleText = articleText.Replace(m.Value, newValue); } } if (tagsAdded.Count > 0 || tagsRemoved.Count > 0) { Parsers p = new Parsers(); HideText ht = new HideText(); articleText = ht.HideUnformatted(articleText); articleText = p.MultipleIssues(articleText); articleText = Conversions(articleText); articleText = ht.AddBackUnformatted(articleText); // sort again in case tag removal requires whitespace cleanup // Don't sort interwikis, we can't specify the correct InterWikiSortOrder p.SortInterwikis = false; articleText = p.Sorter.Sort(articleText, articleTitle); } summary = PrepareTaggerEditSummary(); return articleText; }