private string HideMore(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages) { Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages); string s = Hider.HideMore(text); Assert.AreEqual(text, Hider.AddBackMore(s)); return s; }
private string HideMore(string text, bool hideOnlyTargetOfWikilink) { Hider = new HideText(); string s = Hider.HideMore(text, hideOnlyTargetOfWikilink); Assert.AreEqual(text, Hider.AddBackMore(s)); return s; }
/// <summary> /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox /// </summary> private bool AddedBoldIsValid(string articleText, string escapedTitle) { HideText Hider2 = new HideText(true, true, true); Regex RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase); int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index; int firstBoldPos = RegexFirstBold.Match(articleText).Length; articleText = WikiRegexes.NestedTemplates.Replace(articleText, ""); articleText = Hider2.HideMore(articleText); // was bold added in first 5% of article? bool inFirst5Percent = false; int articlelength = articleText.Length; if (articlelength > 5) { inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''"); } articleText = Hider2.AddBackMore(articleText); // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article return(inFirst5Percent && boldAddedPos <= firstBoldPos); }
/// <summary> /// Applies a series of defined find and replacements to the supplied article text. /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="editSummary"></param> /// <param name="strTitle"></param> /// <returns>The modified article text.</returns> public string MultipleFindAndReplace(string articleText, string strTitle, ref string editSummary) { if (!HasReplacements) { return(articleText); } EditSummary = ""; RemovedSummary = ""; if (chkIgnoreMore.Checked) { articleText = Remove.HideMore(articleText); } else if (chkIgnoreLinks.Checked) { articleText = Remove.Hide(articleText); } foreach (Replacement rep in ReplacementList) { if (!rep.Enabled) { continue; } articleText = PerformFindAndReplace(rep.Find, rep.Replace, articleText, strTitle, rep.RegularExpressionOptions); } if (chkIgnoreMore.Checked) { articleText = Remove.AddBackMore(articleText); } else if (chkIgnoreLinks.Checked) { articleText = Remove.AddBack(articleText); } if (chkAddToSummary.Checked) { if (!string.IsNullOrEmpty(EditSummary)) { editSummary = ", Replaced: " + EditSummary.Trim(); } if (!string.IsNullOrEmpty(RemovedSummary)) { editSummary += ", Removed: " + RemovedSummary.Trim(); } } return(articleText); }
public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary) { Summary = ""; if (TyposCount == 0) { NoChange = true; return(ArticleText); } if (IgnoreRegex.IsMatch(ArticleText)) { NoChange = true; return(ArticleText); } HideText RemoveText = new HideText(true, false, true); ArticleText = RemoveText.HideMore(ArticleText); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(ArticleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) { ArticleText = ArticleText.Remove(m.Index); } string originalText = ArticleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref ArticleText, ref strSummary); } NoChange = (originalText == ArticleText); ArticleText = RemoveText.AddBackMore(ArticleText + tail); if (!string.IsNullOrEmpty(strSummary)) { strSummary = Variables.TypoSummaryTag + strSummary.Trim(); Summary = strSummary; } return(ArticleText); }
/// <summary> /// Performs typo fixes against the article text. /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange"></param> /// <param name="summary"></param> /// <param name="articleTitle">Title of the article</param> /// <returns></returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText)) { noChange = true; return(articleText); } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) { articleText = articleText.Remove(m.Index); } string originalText = articleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } noChange = (originalText.Equals(articleText)); summary = Variables.TypoSummaryTag + strSummary.Trim(); return(removeText.AddBackMore(articleText + tail)); }
/// <summary> /// Applies a series of defined find and replacements to the supplied article text. /// </summary> /// <param name="ArticleText">The wiki text of the article.</param> /// <param name="EditSummary"></param> /// <param name="strTitle"></param> /// <returns>The modified article text.</returns> public string MultipleFindAndReplace(string ArticleText, string strTitle, ref string EditSummary) { streditsummary = ""; if (chkIgnoreMore.Checked) { ArticleText = Remove.HideMore(ArticleText); } else if (chkIgnoreLinks.Checked) { ArticleText = Remove.Hide(ArticleText); } foreach (Replacement rep in ReplacementList) { if (!rep.Enabled) { continue; } ArticleText = PerformFindAndReplace(rep.Find, rep.Replace, ArticleText, strTitle, rep.RegularExpressionOptions); } if (chkIgnoreMore.Checked) { ArticleText = Remove.AddBackMore(ArticleText); } else if (chkIgnoreLinks.Checked) { ArticleText = Remove.AddBack(ArticleText); } if (chkAddToSummary.Checked && !string.IsNullOrEmpty(streditsummary)) { EditSummary = ", Replaced: " + summary.Trim(); } return(ArticleText); }
/// <summary> /// Checks for known typos on the page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">Title of the article</param> /// <returns>whether there are typos on the page</returns> public bool DetectTypo(string articleText, string articleTitle) { string originalArticleText = articleText; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) { return(false); } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); if (m.Success) { articleText = articleText.Remove(m.Index); } string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); if (strSummary.Length > 0) { return(true); } } return(false); }
// Covered by: LinkTests.TestBulletExternalLinks() /// <summary> /// Adds bullet points to external links after "external links" header /// </summary> /// <param name="ArticleText">The wiki text of the article.</param> /// <returns>The modified article text.</returns> public static string BulletExternalLinks(string ArticleText) { int intStart = 0; string articleTextSubstring = ""; Match m = Regex.Match(ArticleText, @"=\s*(?:external)?\s*links\s*=", RegexOptions.IgnoreCase | RegexOptions.RightToLeft); if (!m.Success) return ArticleText; intStart = m.Index; articleTextSubstring = ArticleText.Substring(intStart); ArticleText = ArticleText.Substring(0, intStart); HideText ht = new HideText(false, true, false); articleTextSubstring = ht.HideMore(articleTextSubstring); articleTextSubstring = Regex.Replace(articleTextSubstring, "(\r\n|\n)?(\r\n|\n)(\\[?http)", "$2* $3"); articleTextSubstring = ht.AddBackMore(articleTextSubstring); ArticleText += articleTextSubstring; return ArticleText; }
// Covered by: LinkTests.FixDates() /// <summary> /// Fix date and decade formatting errors. /// </summary> /// <param name="ArticleText">The wiki text of the article.</param> /// <returns>The modified article text.</returns> public string FixDates(string ArticleText) { HideText hidetext = new HideText(); ArticleText = hidetext.HideMore(ArticleText); { ArticleText = FixDatesRaw(ArticleText); //Remove 2 or more <br />'s //This piece's existance here is counter-intuitive, but it requires HideMore() //and I don't want to call this slow function yet another time --MaxSem ArticleText = SyntaxRemoveBr.Replace(ArticleText, "\r\n"); ArticleText = SyntaxRemoveParagraphs.Replace(ArticleText, "\r\n\r\n"); } ArticleText = hidetext.AddBackMore(ArticleText); return ArticleText; }
public void HideMore() { Hider = new HideText(true, false, true); string text = Hider.HideMore("[[foo]]", false, true); RegexAssert.IsMatch(AllHidden, text); text = Hider.AddBackMore(text); Assert.AreEqual("[[foo]]", text); }
// Covered by: BoldTitleTests /// <summary> /// '''Emboldens''' the first occurrence of the article title, if not already bold /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The title of the article.</param> /// <param name="noChange">Value that indicated whether no change was made.</param> /// <returns>The modified article text.</returns> public string BoldTitle(string articleText, string articleTitle, out bool noChange) { HideText Hider2 = new HideText(); HideText Hider3 = new HideText(true, true, true); // clean up bolded self links first articleText = BoldedSelfLinks(articleTitle, articleText); noChange = true; string escTitle = Regex.Escape(articleTitle); string escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, "")); string articleTextAtStart = articleText; string zerothSection = WikiRegexes.ZerothSection.Match(articleText).Value; string restOfArticle = articleText.Remove(0, zerothSection.Length); // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct string zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false); string zerothSectionHiddenOriginal = zerothSectionHidden; // first check for any self links and no bold title, if found just convert first link to bold and return Regex r1 = new Regex(@"\[\[\s*" + escTitle + @"\s*\]\]"); Regex r2 = new Regex(@"\[\[\s*" + Tools.TurnFirstToLower(escTitle) + @"\s*\]\]"); // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks // don't apply if bold in lead section already or some noinclude transclusion business if (!Regex.IsMatch(zerothSection, "'''" + escTitle + "'''") && !WikiRegexes.Noinclude.IsMatch(articleText) && !WikiRegexes.Includeonly.IsMatch(articleText)) zerothSectionHidden = r1.Replace(zerothSectionHidden, "'''" + articleTitle + @"'''"); if (zerothSectionHiddenOriginal == zerothSectionHidden && !Regex.IsMatch(zerothSection, @"'''" + Tools.TurnFirstToLower(escTitle) + @"'''")) zerothSectionHidden = r2.Replace(zerothSectionHidden, "'''" + Tools.TurnFirstToLower(articleTitle) + @"'''"); zerothSection = Hider2.AddBackMore(zerothSectionHidden); if (zerothSectionHiddenOriginal != zerothSectionHidden) { noChange = false; return (zerothSection + restOfArticle); } // ignore date articles (date in American or international format) if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle)) return articleTextAtStart; Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''"); Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''"); //if title in bold already exists in article, or page starts with something in bold, don't change anything if (boldTitleAlready1.IsMatch(articleText) || boldTitleAlready2.IsMatch(articleText) || BoldTitleAlready3.IsMatch(articleText)) return articleTextAtStart; // so no self links to remove, check for the need to add bold string articleTextHidden = Hider3.HideMore(articleText); // first quick check: ignore articles with some bold in first 5% of hidemore article int fivepc = articleTextHidden.Length / 20; if (articleTextHidden.Substring(0, fivepc).Contains("'''")) { //articleText = Hider3.AddBackMore(articleTextHidden); return articleTextAtStart; } Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])"); // first try title with brackets removed if (regexBoldNoBrackets.IsMatch(articleTextHidden)) articleTextHidden = regexBoldNoBrackets.Replace(articleTextHidden, "$1'''$2'''$3", 1); articleText = Hider3.AddBackMore(articleTextHidden); // check that the bold added is the first bit in bold in the main body of the article if (AddedBoldIsValid(articleText, escTitleNoBrackets)) { noChange = false; return articleText; } return articleTextAtStart; }
// Covered by: BoldTitleTests /// <summary> /// '''Emboldens''' the first occurrence of the article title, if not already bold /// 1) Cleans up bolded self wikilinks /// 2) Cleans up self wikilinks /// 3) '''Emboldens''' the first occurrence of the article title /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The title of the article.</param> /// <param name="noChange">Value that indicated whether no change was made.</param> /// <returns>The modified article text.</returns> public string BoldTitle(string articleText, string articleTitle, out bool noChange) { noChange = true; List<string> alltemplates = GetAllTemplates(articleText); if(TemplateExists(alltemplates, NoBoldTitle)) return articleText; HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true); // 1) clean up bolded self links first, provided no noinclude use in article string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText); if(!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) articleText = afterSelfLinks; // 2) Clean up self wikilinks string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText); string restOfArticle = articleText.Substring(zerothSection.Length); string zerothSectionHidden, zerothSectionHiddenOriginal; // first check for any self links and no bold title, if found just convert first link to bold and return // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks // don't apply if bold in lead section already or some noinclude transclusion business if(!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) { // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false); zerothSectionHiddenOriginal = zerothSectionHidden; zerothSectionHidden = SelfLinks(zerothSectionHidden, articleTitle); zerothSection = Hider2.AddBackMore(zerothSectionHidden); if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden)) { noChange = false; return (zerothSection + restOfArticle); } } // Performance check: if article title not in zeroth section have nothing further to do if(zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0) return articleTextAtStart; // 3) '''Emboldens''' the first occurrence of the article title // ignore date articles (date in American or international format), nihongo title if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle) || TemplateExists(alltemplates, NihongoTitle)) return articleTextAtStart; string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, "")); Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''"); Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''"); // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything // ignore any bold in infoboxes if(BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection)) return articleTextAtStart; string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText)); if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox) || BoldTitleAlready3.IsMatch(articleTextNoInfobox)) return articleTextAtStart; // so no self links to remove, check for the need to add bold string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, ""); // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc. int fivepc = articleTextNoTemplates.Length / 20; if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''")) return articleTextAtStart; Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])"); zerothSectionHidden = Hider3.HideMore(zerothSection); zerothSectionHiddenOriginal = zerothSectionHidden; // first try title with brackets removed zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1); zerothSection = Hider3.AddBackMore(zerothSectionHidden); articleText = zerothSection + restOfArticle; // check that the bold added is the first bit in bold in the main body of the article if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets)) { noChange = false; return articleText; } return articleTextAtStart; }
/// <summary> /// Checks for known typos on the page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">Title of the article</param> /// <returns>whether there are typos on the page</returns> public bool DetectTypo(string articleText, string articleTitle) { string originalArticleText = articleText; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) return false; HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); // remove newlines, whitespace and hide tokens from bottom // to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); if (m.Success) articleText = articleText.Remove(m.Index); string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); if (strSummary.Length > 0) return true; } return false; }
public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary) { Summary = ""; if (TyposCount == 0) { NoChange = true; return ArticleText; } if (IgnoreRegex.IsMatch(ArticleText)) { NoChange = true; return ArticleText; } HideText RemoveText = new HideText(true, false, true); ArticleText = RemoveText.HideMore(ArticleText); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(ArticleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) ArticleText = ArticleText.Remove(m.Index); string originalText = ArticleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref ArticleText, ref strSummary); } NoChange = (originalText == ArticleText); ArticleText = RemoveText.AddBackMore(ArticleText + tail); if (!string.IsNullOrEmpty(strSummary)) { strSummary = Variables.TypoSummaryTag + strSummary.Trim(); Summary = strSummary; } return ArticleText; }
/// <summary> /// Applies a series of defined find and replacements to the supplied article text. /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="editSummary"></param> /// <param name="strTitle"></param> /// <param name="beforeOrAfter">False if "before", true if "after"</param> /// <param name="majorChangesMade"></param> /// <returns>The modified article text.</returns> public string MultipleFindAndReplace(string articleText, string strTitle, bool beforeOrAfter, ref string editSummary, out bool majorChangesMade) { majorChangesMade = false; if (!HasReplacements) { return(articleText); } ReplacedSummary = ""; RemovedSummary = ""; if (chkIgnoreMore.Checked) { articleText = _remove.HideMore(articleText); } else if (chkIgnoreLinks.Checked) { articleText = _remove.Hide(articleText); } foreach (Replacement rep in _replacementList) { if (!rep.Enabled || rep.BeforeOrAfter != beforeOrAfter) { continue; } bool changeMade; articleText = PerformFindAndReplace(rep, articleText, strTitle, out changeMade); if (changeMade && !rep.Minor) { majorChangesMade = true; } } if (chkIgnoreMore.Checked) { // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs#FormatException_in_HideText.AddBackMore // FIXME: Usages of IgnoreMore with number (or M) replacement done in the FindAndReplace can cause corruption // e.g. Replacing 2 with "" ⌊⌊⌊⌊M2⌋⌋⌋⌋ becomes ⌊⌊⌊⌊M⌋⌋⌋⌋ // This cannot then be added back articleText = _remove.AddBackMore(articleText); } else if (chkIgnoreLinks.Checked) { articleText = _remove.AddBack(articleText); } if (chkAddToSummary.Checked) { if (!string.IsNullOrEmpty(ReplacedSummary)) { if (Variables.LangCode.Equals("ar")) { editSummary = "استبدل: " + ReplacedSummary.Trim(); } else if (Variables.LangCode.Equals("arz")) { editSummary = "غير: " + ReplacedSummary.Trim(); } else if (Variables.LangCode.Equals("el")) { editSummary = "αντικατέστησε: " + ReplacedSummary.Trim(); } else if (Variables.LangCode.Equals("eo")) { editSummary = "anstataŭigis: " + ReplacedSummary.Trim(); } else if (Variables.LangCode.Equals("fr")) { editSummary = "remplacement: " + ReplacedSummary.Trim(); } else if (Variables.LangCode.Equals("hy")) { editSummary = "փոխարինվեց: " + ReplacedSummary.Trim(); } else if (Variables.LangCode.Equals("tr")) { editSummary = "değiştirildi: " + ReplacedSummary.Trim(); } else { editSummary += "replaced: " + ReplacedSummary.Trim(); } } if (!string.IsNullOrEmpty(RemovedSummary)) { if (!string.IsNullOrEmpty(editSummary)) { if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz") || Variables.LangCode.Equals("fa")) { editSummary += "، "; } else { editSummary += ", "; } } if (Variables.LangCode.Equals("ar")) { editSummary += "أزال: " + RemovedSummary.Trim(); } else if (Variables.LangCode.Equals("arz")) { editSummary += "شال: " + RemovedSummary.Trim(); } else if (Variables.LangCode.Equals("el")) { editSummary += "αφαίρεσε: " + RemovedSummary.Trim(); } else if (Variables.LangCode.Equals("eo")) { editSummary += "forigis: " + RemovedSummary.Trim(); } else if (Variables.LangCode.Equals("fr")) { editSummary += "retrait: " + RemovedSummary.Trim(); } else if (Variables.LangCode.Equals("hy")) { editSummary += "ջնջվեց: " + RemovedSummary.Trim(); } else { editSummary += "removed: " + RemovedSummary.Trim(); } } } return(articleText); }
private string HideMore(string text, bool HideOnlyTargetOfWikilink) { Hider = new HideText(); return Hider.HideMore(text, HideOnlyTargetOfWikilink); }
private string HideMore(string text, bool HideExternalLinks, bool LeaveMetaHeadings, bool HideImages) { Hider = new HideText(HideExternalLinks, LeaveMetaHeadings, HideImages); return Hider.HideMore(text); }
/// <summary> /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox /// </summary> private bool AddedBoldIsValid(string articleText, string escapedTitle) { HideText Hider2 = new HideText(true, true, true); string articletextoriginal = articleText; Regex regexBoldAdded = new Regex(@"^(.*?)'''" + escapedTitle, RegexOptions.Singleline); int boldAddedPos = regexBoldAdded.Match(articleText).Length - Regex.Unescape(escapedTitle).Length; int firstBoldPos = RegexFirstBold.Match(articleText).Length; articleText = Hider2.HideMore(articleText); // was bold added in first 5% of article? bool inFirst5Percent = articleText.Substring(0, articleText.Length / 20).Contains("'''"); //articleText = Hider2.AddBackMore(articleText); // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article if (inFirst5Percent && boldAddedPos <= firstBoldPos) return true; // second check: bold just after infobox Regex boldAfterInfobox = new Regex(WikiRegexes.InfoBox + @"\s*'''" + escapedTitle); return boldAfterInfobox.IsMatch(articletextoriginal); }
/// <summary> /// Performs typo fixes against the article text. /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange"></param> /// <param name="summary"></param> /// <param name="articleTitle">Title of the article</param> /// <returns></returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText)) { noChange = true; return articleText; } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) articleText = articleText.Remove(m.Index); string originalText = articleText; string strSummary = ""; foreach (TypoGroup grp in Groups) { grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } noChange = (originalText.Equals(articleText)); summary = Variables.TypoSummaryTag + strSummary.Trim(); return removeText.AddBackMore(articleText + tail); }
private string Hide(string text) { hider = new HideText(); return hider.HideMore(text); }
/// <summary> /// Performs typo fixes against the article text in multi-threaded mode /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange">True if no typos fixed</param> /// <param name="summary">Edit summary</param> /// <param name="articleTitle">Title of the article</param> /// <returns>Updated article text</returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) { noChange = true; return articleText; } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); // remove newlines, whitespace and hide tokens from bottom // to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) articleText = articleText.Remove(m.Index); string originalText = articleText; string strSummary = ""; /* Run typos threaded, one thread per group for better performance * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart * http://www.dotnetperls.com/parameterizedthreadstart * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */ resultSummary.Clear(); resultArticleText.Clear(); Thread[] array = new Thread[Groups.Count]; int i = 0; foreach (TypoGroup tg in Groups) { array[i] = new Thread( delegate() { tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText); }); array[i].Start(); i++; } // Join all the threads: wait for all to complete foreach (Thread t in array) { t.Join(); } foreach (TypoGroup tg in Groups) { string groupSummary; resultSummary.TryGetValue(tg.GroupSize, out groupSummary); string groupArticleText; resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText); if (groupSummary.Length > 0) { if (strSummary.Length > 0) { // earlier thread had changes, so need to re-run this one tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } else { strSummary = groupSummary; articleText = groupArticleText; } } } noChange = originalText.Equals(articleText); summary = Variables.TypoSummaryTag + strSummary.Trim(); return removeText.AddBackMore(articleText + tail); }
/// <summary> /// Applies a series of defined find and replacements to the supplied article text. /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="editSummary"></param> /// <param name="strTitle"></param> /// <param name="beforeOrAfter">False if "before", true if "after"</param> /// <param name="majorChangesMade"></param> /// <returns>The modified article text.</returns> public string MultipleFindAndReplace(string articleText, string strTitle, bool beforeOrAfter, ref string editSummary, out bool majorChangesMade) { majorChangesMade = false; if (!HasReplacements) { return(articleText); } _replacedSummary = ""; _removedSummary = ""; if (chkIgnoreMore.Checked) { articleText = _remove.HideMore(articleText); } else if (chkIgnoreLinks.Checked) { articleText = _remove.Hide(articleText); } foreach (Replacement rep in _replacementList) { if (!rep.Enabled || rep.BeforeOrAfter != beforeOrAfter) { continue; } bool changeMade; articleText = PerformFindAndReplace(rep, articleText, strTitle, out changeMade); if (changeMade && !rep.Minor) { majorChangesMade = true; } } if (chkIgnoreMore.Checked) { articleText = _remove.AddBackMore(articleText); } else if (chkIgnoreLinks.Checked) { articleText = _remove.AddBack(articleText); } if (chkAddToSummary.Checked) { if (!string.IsNullOrEmpty(_replacedSummary)) { editSummary = "replaced: " + _replacedSummary.Trim(); } if (!string.IsNullOrEmpty(_removedSummary)) { if (!string.IsNullOrEmpty(editSummary)) { editSummary += ", "; } editSummary += "removed: " + _removedSummary.Trim(); } } return(articleText); }
public void HideImages() { AssertAllHidden(@"[[File:foo.jpg]]"); AssertAllHidden(@"[[File:foo with space and 0004.jpg]]"); AssertAllHidden(@"[[File:foo.jpeg]]"); AssertAllHidden(@"[[File:foo.JPEG]]"); AssertAllHidden(@"[[Image:foo with space and 0004.jpeg]]"); AssertAllHidden(@"[[Image:foo.jpeg]]"); AssertAllHidden(@"[[Image:foo with space and 0004.jpg]]"); AssertAllHidden(@"[[File:foo.jpg|"); AssertAllHidden(@"[[File:foo with space and 0004.jpg|"); AssertAllHidden(@"[[File:foo.jpeg|"); AssertAllHidden(@"[[Image:foo with space and 0004.jpeg|"); AssertAllHidden(@"[[Image:foo.jpeg|"); AssertAllHidden(@"[[Image:foo with SPACE() and 0004.jpg|"); AssertAllHidden(@"[[File:foo.gif|"); AssertAllHidden(@"[[Image:foo with space and 0004.gif|"); AssertAllHidden(@"[[Image:foo.gif|"); AssertAllHidden(@"[[Image:foo with SPACE() and 0004.gif|"); AssertAllHidden(@"[[File:foo.png|"); AssertAllHidden(@"[[Image:foo with space and 0004.png|"); AssertAllHidden(@"[[Image:foo_here.png|"); AssertAllHidden(@"[[Image:foo with SPACE() and 0004.png|"); AssertAllHidden(@"[[Image:westminster.tube.station.jubilee.arp.jpg|"); AssertAllHidden(@"<imagemap> File:Blogs001.jpeg|Description File:Blogs002.jpeg|Description </imagemap>"); AssertBothHidden(@"[[File:foo.jpg]]"); AssertBothHidden(@"[[Image:foo with space and 0004.png|"); AssertBothHidden(@"[[Image:foo_here.png|"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets"); HideText h = new HideText(true, false, false); Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks"); }
/// <summary> /// Performs typo fixes against the article text in multi-threaded mode /// Typo fixes not performed if no typos loaded or any sic tags on page /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="noChange">True if no typos fixed</param> /// <param name="summary">Edit summary</param> /// <param name="articleTitle">Title of the article</param> /// <returns>Updated article text</returns> public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle) { string originalArticleText = articleText; summary = ""; if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText)) { noChange = true; return(articleText); } HideText removeText = new HideText(true, false, true); articleText = removeText.HideMore(articleText, true); //remove newlines, whitespace and hide tokens from bottom //to avoid running 2K regexps on them Match m = RemoveTail.Match(articleText); string tail = m.Value; if (!string.IsNullOrEmpty(tail)) { articleText = articleText.Remove(m.Index); } string originalText = articleText; string strSummary = ""; /* Run typos threaded, one thread per group for better performance * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart * http://www.dotnetperls.com/parameterizedthreadstart * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */ resultSummary.Clear(); resultArticleText.Clear(); Thread[] array = new Thread[Groups.Count]; int i = 0; foreach (TypoGroup tg in Groups) { array[i] = new Thread(delegate(object unused) { tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText); }); array[i].Start(i); i++; } // Join all the threads: wait for all to complete for (int j = 0; j < array.Length; j++) { array[j].Join(); } string groupSummary, groupArticleText; foreach (TypoGroup tg in Groups) { resultSummary.TryGetValue(tg.GroupSize, out groupSummary); resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText); if (groupSummary.Length > 0) { if (strSummary.Length > 0) { // earlier thread had changes, so need to re-run this one tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText); } else { strSummary = groupSummary; articleText = groupArticleText; } } } noChange = originalText.Equals(articleText); summary = Variables.TypoSummaryTag + strSummary.Trim(); return(removeText.AddBackMore(articleText + tail)); }
/// <summary> /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox /// </summary> private bool AddedBoldIsValid(string articleText, string escapedTitle) { HideText Hider2 = new HideText(true, true, true); Regex RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase); int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index; int firstBoldPos = RegexFirstBold.Match(articleText).Length; articleText = WikiRegexes.NestedTemplates.Replace(articleText, ""); articleText = Hider2.HideMore(articleText); // was bold added in first 5% of article? bool inFirst5Percent = false; int articlelength = articleText.Length; if (articlelength > 5) inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''"); articleText = Hider2.AddBackMore(articleText); // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article return inFirst5Percent && boldAddedPos <= firstBoldPos; }
public void HideImages() { Assert.IsFalse(Hide(@"[[File:foo.jpg]]").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg]]").Contains("foo"), "with space"); Assert.IsFalse(Hide(@"[[File:foo.jpeg]]").Contains("foo"), "jpeg"); Assert.IsFalse(Hide(@"[[File:foo.JPEG]]").Contains("foo"), "JPEG"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg]]").Contains("foo"), "space and jpeg"); Assert.IsFalse(Hide(@"[[Image:foo.jpeg]]").Contains("foo"), "Image jpeg"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpg]]").Contains("foo"), "image jpeg space"); Assert.IsFalse(Hide(@"[[File:foo.jpg|").Contains("foo"), "To pipe"); Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg|").Contains("foo"), "Space to pipe"); Assert.IsFalse(Hide(@"[[File:foo.jpeg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo.jpeg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.jpg|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[File:foo.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.gif|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[File:foo.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo_here.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.png|").Contains("foo"), "Standard case"); Assert.IsFalse(Hide(@"[[Image:westminster.tube.station.jubilee.arp.jpg|").Contains("westminster.tube.station.jubilee.arp"), "Dot name"); Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").StartsWith("[["), "Retain starting brackets"); Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").Contains(@"thumb|140px|[[Jo]] Assistant [[Ge]]]]"), "Retain ending brackets"); AssertAllHidden(@"<imagemap> File:Blogs001.jpeg|Description File:Blogs002.jpeg|Description </imagemap>"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target"); Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets"); HideText h = new HideText(true, false, false); Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks"); }
// Covered by: BoldTitleTests /// <summary> /// '''Emboldens''' the first occurrence of the article title, if not already bold /// 1) Cleans up bolded self wikilinks /// 2) Cleans up self wikilinks /// 3) '''Emboldens''' the first occurrence of the article title /// </summary> /// <param name="articleText">The wiki text of the article.</param> /// <param name="articleTitle">The title of the article.</param> /// <param name="noChange">Value that indicated whether no change was made.</param> /// <returns>The modified article text.</returns> public string BoldTitle(string articleText, string articleTitle, out bool noChange) { noChange = true; List <string> alltemplates = GetAllTemplates(articleText); if (TemplateExists(alltemplates, NoBoldTitle)) { return(articleText); } HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true); // 1) clean up bolded self links first, provided no noinclude use in article string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText); if (!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) { articleText = afterSelfLinks; } // 2) Clean up self wikilinks string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText); string restOfArticle = articleText.Substring(zerothSection.Length); string zerothSectionHidden, zerothSectionHiddenOriginal; // first check for any self links and no bold title, if found just convert first link to bold and return // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks // don't apply if bold in lead section already or some noinclude transclusion business if (!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText)) { // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false); zerothSectionHiddenOriginal = zerothSectionHidden; zerothSectionHidden = SelfLinks(zerothSectionHidden, articleTitle); zerothSection = Hider2.AddBackMore(zerothSectionHidden); if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden)) { noChange = false; return(zerothSection + restOfArticle); } } // Performance check: if article title not in zeroth section have nothing further to do if (zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0) { return(articleTextAtStart); } // 3) '''Emboldens''' the first occurrence of the article title // ignore date articles (date in American or international format), nihongo title if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle) || TemplateExists(alltemplates, NihongoTitle)) { return(articleTextAtStart); } string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, "")); Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''"); Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''"); // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything // ignore any bold in infoboxes if (BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection)) { return(articleTextAtStart); } string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText)); if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox) || BoldTitleAlready3.IsMatch(articleTextNoInfobox)) { return(articleTextAtStart); } // so no self links to remove, check for the need to add bold string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, ""); // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc. int fivepc = articleTextNoTemplates.Length / 20; if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''")) { return(articleTextAtStart); } Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])"); zerothSectionHidden = Hider3.HideMore(zerothSection); zerothSectionHiddenOriginal = zerothSectionHidden; // first try title with brackets removed zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1); zerothSection = Hider3.AddBackMore(zerothSectionHidden); articleText = zerothSection + restOfArticle; // check that the bold added is the first bit in bold in the main body of the article if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets)) { noChange = false; return(articleText); } return(articleTextAtStart); }
/// <summary> /// Fix date and decade formatting errors. /// </summary> /// <param name="ArticleText">The wiki text of the article.</param> /// <returns>The modified article text.</returns> public string FixDates(string ArticleText) { HideText hidetext = new HideText(); ArticleText = hidetext.HideMore(ArticleText); { ArticleText = FixDatesRaw(ArticleText); //Remove 2 or more <br />'s //This piece's existance here is counter-intuitive, but it requires HideMore() //and I don't want to call this slow function yet another time --MaxSem ArticleText = Regex.Replace(ArticleText.Trim(), @"(<br[\s/]*> *){2,}", "\r\n", RegexOptions.IgnoreCase); } ArticleText = hidetext.AddBackMore(ArticleText); return ArticleText; }