AddBackMore() публичный метод

Adds back hidden stuff from HideMore
public AddBackMore ( string articleText ) : string
articleText string
Результат string
Пример #1
0
        /// <summary>
        /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox
        /// </summary>
        private bool AddedBoldIsValid(string articleText, string escapedTitle)
        {
            HideText Hider2         = new HideText(true, true, true);
            Regex    RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase);

            int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index;

            int firstBoldPos = RegexFirstBold.Match(articleText).Length;

            articleText = WikiRegexes.NestedTemplates.Replace(articleText, "");

            articleText = Hider2.HideMore(articleText);

            // was bold added in first 5% of article?
            bool inFirst5Percent = false;

            int articlelength = articleText.Length;

            if (articlelength > 5)
            {
                inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''");
            }

            articleText = Hider2.AddBackMore(articleText);
            // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article
            return(inFirst5Percent && boldAddedPos <= firstBoldPos);
        }
 private string HideMore(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages)
 {
     Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages);
     string s = Hider.HideMore(text);
     Assert.AreEqual(text, Hider.AddBackMore(s));
     return s;
 }
 private string HideMore(string text, bool hideOnlyTargetOfWikilink)
 {
     Hider = new HideText();
     string s = Hider.HideMore(text, hideOnlyTargetOfWikilink);
     Assert.AreEqual(text, Hider.AddBackMore(s));
     return s;
 }
        /// <summary>
        /// Applies a series of defined find and replacements to the supplied article text.
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="editSummary"></param>
        /// <param name="strTitle"></param>
        /// <returns>The modified article text.</returns>
        public string MultipleFindAndReplace(string articleText, string strTitle, ref string editSummary)
        {
            if (!HasReplacements)
            {
                return(articleText);
            }

            EditSummary    = "";
            RemovedSummary = "";

            if (chkIgnoreMore.Checked)
            {
                articleText = Remove.HideMore(articleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                articleText = Remove.Hide(articleText);
            }

            foreach (Replacement rep in ReplacementList)
            {
                if (!rep.Enabled)
                {
                    continue;
                }

                articleText = PerformFindAndReplace(rep.Find, rep.Replace, articleText, strTitle, rep.RegularExpressionOptions);
            }

            if (chkIgnoreMore.Checked)
            {
                articleText = Remove.AddBackMore(articleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                articleText = Remove.AddBack(articleText);
            }

            if (chkAddToSummary.Checked)
            {
                if (!string.IsNullOrEmpty(EditSummary))
                {
                    editSummary = ", Replaced: " + EditSummary.Trim();
                }

                if (!string.IsNullOrEmpty(RemovedSummary))
                {
                    editSummary += ", Removed: " + RemovedSummary.Trim();
                }
            }

            return(articleText);
        }
        public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary)
        {
            Summary = "";
            if (TyposCount == 0)
            {
                NoChange = true;
                return(ArticleText);
            }

            if (IgnoreRegex.IsMatch(ArticleText))
            {
                NoChange = true;
                return(ArticleText);
            }

            HideText RemoveText = new HideText(true, false, true);

            ArticleText = RemoveText.HideMore(ArticleText);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match  m    = RemoveTail.Match(ArticleText);
            string tail = m.Value;

            if (!string.IsNullOrEmpty(tail))
            {
                ArticleText = ArticleText.Remove(m.Index);
            }

            string originalText = ArticleText;
            string strSummary   = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref ArticleText, ref strSummary);
            }

            NoChange = (originalText == ArticleText);

            ArticleText = RemoveText.AddBackMore(ArticleText + tail);

            if (!string.IsNullOrEmpty(strSummary))
            {
                strSummary = Variables.TypoSummaryTag + strSummary.Trim();
                Summary    = strSummary;
            }

            return(ArticleText);
        }
        /// <summary>
        /// Performs typo fixes against the article text.
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange"></param>
        /// <param name="summary"></param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns></returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;

            summary = "";
            if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return(articleText);
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match  m    = RemoveTail.Match(articleText);
            string tail = m.Value;

            if (!string.IsNullOrEmpty(tail))
            {
                articleText = articleText.Remove(m.Index);
            }

            string originalText = articleText;
            string strSummary   = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
            }

            noChange = (originalText.Equals(articleText));

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return(removeText.AddBackMore(articleText + tail));
        }
Пример #7
0
        /// <summary>
        /// Applies a series of defined find and replacements to the supplied article text.
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <param name="EditSummary"></param>
        /// <param name="strTitle"></param>
        /// <returns>The modified article text.</returns>
        public string MultipleFindAndReplace(string ArticleText, string strTitle, ref string EditSummary)
        {
            streditsummary = "";

            if (chkIgnoreMore.Checked)
            {
                ArticleText = Remove.HideMore(ArticleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                ArticleText = Remove.Hide(ArticleText);
            }

            foreach (Replacement rep in ReplacementList)
            {
                if (!rep.Enabled)
                {
                    continue;
                }

                ArticleText = PerformFindAndReplace(rep.Find, rep.Replace, ArticleText, strTitle, rep.RegularExpressionOptions);
            }

            if (chkIgnoreMore.Checked)
            {
                ArticleText = Remove.AddBackMore(ArticleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                ArticleText = Remove.AddBack(ArticleText);
            }

            if (chkAddToSummary.Checked && !string.IsNullOrEmpty(streditsummary))
            {
                EditSummary = ", Replaced: " + summary.Trim();
            }

            return(ArticleText);
        }
Пример #8
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// 1) Cleans up bolded self wikilinks
        /// 2) Cleans up self wikilinks
        /// 3) '''Emboldens''' the first occurrence of the article title
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            noChange = true;
            List <string> alltemplates = GetAllTemplates(articleText);

            if (TemplateExists(alltemplates, NoBoldTitle))
            {
                return(articleText);
            }

            HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true);

            // 1) clean up bolded self links first, provided no noinclude use in article
            string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText);

            if (!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
            {
                articleText = afterSelfLinks;
            }

            // 2) Clean up self wikilinks
            string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText);
            string restOfArticle = articleText.Substring(zerothSection.Length);
            string zerothSectionHidden, zerothSectionHiddenOriginal;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if (!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
            {
                // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
                zerothSectionHidden         = Hider2.HideMore(zerothSection, false, false, false);
                zerothSectionHiddenOriginal = zerothSectionHidden;
                zerothSectionHidden         = SelfLinks(zerothSectionHidden, articleTitle);
                zerothSection = Hider2.AddBackMore(zerothSectionHidden);

                if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden))
                {
                    noChange = false;
                    return(zerothSection + restOfArticle);
                }
            }

            // Performance check: if article title not in zeroth section have nothing further to do
            if (zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0)
            {
                return(articleTextAtStart);
            }

            // 3) '''Emboldens''' the first occurrence of the article title

            // ignore date articles (date in American or international format), nihongo title
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle) ||
                TemplateExists(alltemplates, NihongoTitle))
            {
                return(articleTextAtStart);
            }

            string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));
            Regex  boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex  boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything
            // ignore any bold in infoboxes
            if (BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection))
            {
                return(articleTextAtStart);
            }

            string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText));

            if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox) ||
                BoldTitleAlready3.IsMatch(articleTextNoInfobox))
            {
                return(articleTextAtStart);
            }

            // so no self links to remove, check for the need to add bold
            string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, "");

            // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc.
            int fivepc = articleTextNoTemplates.Length / 20;

            if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''"))
            {
                return(articleTextAtStart);
            }

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            zerothSectionHidden         = Hider3.HideMore(zerothSection);
            zerothSectionHiddenOriginal = zerothSectionHidden;

            // first try title with brackets removed
            zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1);

            zerothSection = Hider3.AddBackMore(zerothSectionHidden);

            articleText = zerothSection + restOfArticle;

            // check that the bold added is the first bit in bold in the main body of the article
            if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return(articleText);
            }

            return(articleTextAtStart);
        }
Пример #9
0
        /// <summary>
        /// Applies a series of defined find and replacements to the supplied article text.
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="editSummary"></param>
        /// <param name="strTitle"></param>
        /// <param name="beforeOrAfter">False if "before", true if "after"</param>
        /// <param name="majorChangesMade"></param>
        /// <returns>The modified article text.</returns>
        public string MultipleFindAndReplace(string articleText, string strTitle, bool beforeOrAfter, ref string editSummary, out bool majorChangesMade)
        {
            majorChangesMade = false;

            if (!HasReplacements)
            {
                return(articleText);
            }

            ReplacedSummary = "";
            RemovedSummary  = "";

            if (chkIgnoreMore.Checked)
            {
                articleText = _remove.HideMore(articleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                articleText = _remove.Hide(articleText);
            }

            foreach (Replacement rep in _replacementList)
            {
                if (!rep.Enabled || rep.BeforeOrAfter != beforeOrAfter)
                {
                    continue;
                }

                bool changeMade;
                articleText = PerformFindAndReplace(rep, articleText, strTitle, out changeMade);

                if (changeMade && !rep.Minor)
                {
                    majorChangesMade = true;
                }
            }

            if (chkIgnoreMore.Checked)
            {
                // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs#FormatException_in_HideText.AddBackMore
                // FIXME: Usages of IgnoreMore with number (or M) replacement done in the FindAndReplace can cause corruption
                // e.g. Replacing 2 with "" ⌊⌊⌊⌊M2⌋⌋⌋⌋ becomes ⌊⌊⌊⌊M⌋⌋⌋⌋
                // This cannot then be added back
                articleText = _remove.AddBackMore(articleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                articleText = _remove.AddBack(articleText);
            }

            if (chkAddToSummary.Checked)
            {
                if (!string.IsNullOrEmpty(ReplacedSummary))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        editSummary = "استبدل: " + ReplacedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        editSummary = "غير: " + ReplacedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("el"))
                    {
                        editSummary = "αντικατέστησε: " + ReplacedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("eo"))
                    {
                        editSummary = "anstataŭigis: " + ReplacedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("fr"))
                    {
                        editSummary = "remplacement: " + ReplacedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("hy"))
                    {
                        editSummary = "փոխարինվեց: " + ReplacedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("tr"))
                    {
                        editSummary = "değiştirildi: " + ReplacedSummary.Trim();
                    }
                    else
                    {
                        editSummary += "replaced: " + ReplacedSummary.Trim();
                    }
                }

                if (!string.IsNullOrEmpty(RemovedSummary))
                {
                    if (!string.IsNullOrEmpty(editSummary))
                    {
                        if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz") || Variables.LangCode.Equals("fa"))
                        {
                            editSummary += "، ";
                        }
                        else
                        {
                            editSummary += ", ";
                        }
                    }

                    if (Variables.LangCode.Equals("ar"))
                    {
                        editSummary += "أزال: " + RemovedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        editSummary += "شال: " + RemovedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("el"))
                    {
                        editSummary += "αφαίρεσε: " + RemovedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("eo"))
                    {
                        editSummary += "forigis: " + RemovedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("fr"))
                    {
                        editSummary += "retrait: " + RemovedSummary.Trim();
                    }
                    else if (Variables.LangCode.Equals("hy"))
                    {
                        editSummary += "ջնջվեց: " + RemovedSummary.Trim();
                    }
                    else
                    {
                        editSummary += "removed: " + RemovedSummary.Trim();
                    }
                }
            }

            return(articleText);
        }
Пример #10
0
        /// <summary>
        /// Performs typo fixes against the article text.
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange"></param>
        /// <param name="summary"></param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns></returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;
            summary = "";
            if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return articleText;
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail)) articleText = articleText.Remove(m.Index);

            string originalText = articleText;
            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
            }

            noChange = (originalText.Equals(articleText));

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return removeText.AddBackMore(articleText + tail);
        }
Пример #11
0
        // Covered by: LinkTests.FixDates()
        /// <summary>
        /// Fix date and decade formatting errors.
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public string FixDates(string ArticleText)
        {
            HideText hidetext = new HideText();
            ArticleText = hidetext.HideMore(ArticleText);
            {
                ArticleText = FixDatesRaw(ArticleText);

                //Remove 2 or more <br />'s
                //This piece's existance here is counter-intuitive, but it requires HideMore()
                //and I don't want to call this slow function yet another time --MaxSem
                ArticleText = SyntaxRemoveBr.Replace(ArticleText, "\r\n");
                ArticleText = SyntaxRemoveParagraphs.Replace(ArticleText, "\r\n\r\n");

            }
            ArticleText = hidetext.AddBackMore(ArticleText);
            return ArticleText;
        }
Пример #12
0
        // Covered by: LinkTests.TestBulletExternalLinks()
        /// <summary>
        /// Adds bullet points to external links after "external links" header
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public static string BulletExternalLinks(string ArticleText)
        {
            int intStart = 0;
            string articleTextSubstring = "";

            Match m = Regex.Match(ArticleText, @"=\s*(?:external)?\s*links\s*=", RegexOptions.IgnoreCase | RegexOptions.RightToLeft);

            if (!m.Success)
                return ArticleText;

            intStart = m.Index;

            articleTextSubstring = ArticleText.Substring(intStart);
            ArticleText = ArticleText.Substring(0, intStart);
            HideText ht = new HideText(false, true, false);
            articleTextSubstring = ht.HideMore(articleTextSubstring);
            articleTextSubstring = Regex.Replace(articleTextSubstring, "(\r\n|\n)?(\r\n|\n)(\\[?http)", "$2* $3");
            articleTextSubstring = ht.AddBackMore(articleTextSubstring);
            ArticleText += articleTextSubstring;

            return ArticleText;
        }
Пример #13
0
        /// <summary>
        /// Performs typo fixes against the article text in multi-threaded mode
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange">True if no typos fixed</param>
        /// <param name="summary">Edit summary</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>Updated article text</returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;
            summary = "";
            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return articleText;
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            // remove newlines, whitespace and hide tokens from bottom
            // to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail))
                articleText = articleText.Remove(m.Index);

            string originalText = articleText;
            string strSummary = "";
            /* Run typos threaded, one thread per group for better performance
             * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart
             * http://www.dotnetperls.com/parameterizedthreadstart
             * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */
            resultSummary.Clear();
            resultArticleText.Clear();

            Thread[] array = new Thread[Groups.Count];
            int i = 0;
            foreach (TypoGroup tg in Groups)
            {
                array[i] =
                    new Thread(
                        delegate()
                        {
                            tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText);
                        });
                array[i].Start();
                i++;
            }

            // Join all the threads: wait for all to complete
            foreach (Thread t in array)
            {
                t.Join();
            }

            foreach (TypoGroup tg in Groups)
            {
                string groupSummary;
                resultSummary.TryGetValue(tg.GroupSize, out groupSummary);
                string groupArticleText;
                resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText);

                if (groupSummary.Length > 0)
                {
                    if (strSummary.Length > 0)
                    {
                        // earlier thread had changes, so need to re-run this one
                        tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
                    }
                    else
                    {
                        strSummary = groupSummary;
                        articleText = groupArticleText;
                    }
                }
            }

            noChange = originalText.Equals(articleText);

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return removeText.AddBackMore(articleText + tail);
        }
Пример #14
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// 1) Cleans up bolded self wikilinks
        /// 2) Cleans up self wikilinks
        /// 3) '''Emboldens''' the first occurrence of the article title
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            noChange = true;
            List<string> alltemplates = GetAllTemplates(articleText);

            if(TemplateExists(alltemplates, NoBoldTitle))
                return articleText;

            HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true);

            // 1) clean up bolded self links first, provided no noinclude use in article
            string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText);

            if(!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
                articleText = afterSelfLinks;

            // 2) Clean up self wikilinks
            string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText);
            string restOfArticle = articleText.Substring(zerothSection.Length);
            string zerothSectionHidden, zerothSectionHiddenOriginal;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if(!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
            {
                // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
                zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false);
                zerothSectionHiddenOriginal = zerothSectionHidden;
                zerothSectionHidden = SelfLinks(zerothSectionHidden, articleTitle);
                zerothSection = Hider2.AddBackMore(zerothSectionHidden);

                if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden))
                {
                    noChange = false;
                    return (zerothSection + restOfArticle);
                }
            }

            // Performance check: if article title not in zeroth section have nothing further to do
            if(zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0)
                return articleTextAtStart;

            // 3) '''Emboldens''' the first occurrence of the article title

            // ignore date articles (date in American or international format), nihongo title
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle)
                || TemplateExists(alltemplates, NihongoTitle))
                return articleTextAtStart;

            string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));
            Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything
            // ignore any bold in infoboxes
            if(BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection))
                return articleTextAtStart;

            string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText));
            if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox)
                || BoldTitleAlready3.IsMatch(articleTextNoInfobox))
                return articleTextAtStart;

            // so no self links to remove, check for the need to add bold
            string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, "");

            // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc.
            int fivepc = articleTextNoTemplates.Length / 20;

            if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''"))
                return articleTextAtStart;

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            zerothSectionHidden = Hider3.HideMore(zerothSection);
            zerothSectionHiddenOriginal = zerothSectionHidden;

            // first try title with brackets removed
            zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1);

            zerothSection = Hider3.AddBackMore(zerothSectionHidden);

            articleText = zerothSection + restOfArticle;

            // check that the bold added is the first bit in bold in the main body of the article
            if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return articleText;
            }

            return articleTextAtStart;
        }
Пример #15
0
        public void HideMore()
        {
            Hider = new HideText(true, false, true);

            string text = Hider.HideMore("[[foo]]", false, true);
            RegexAssert.IsMatch(AllHidden, text);
            text = Hider.AddBackMore(text);
            Assert.AreEqual("[[foo]]", text);
        }
Пример #16
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            HideText Hider2 = new HideText();
            HideText Hider3 = new HideText(true, true, true);
            // clean up bolded self links first
            articleText = BoldedSelfLinks(articleTitle, articleText);

            noChange = true;
            string escTitle = Regex.Escape(articleTitle);
            string escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));

            string articleTextAtStart = articleText;

            string zerothSection = WikiRegexes.ZerothSection.Match(articleText).Value;
            string restOfArticle = articleText.Remove(0, zerothSection.Length);

            // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
            string zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false);
            string zerothSectionHiddenOriginal = zerothSectionHidden;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            Regex r1 = new Regex(@"\[\[\s*" + escTitle + @"\s*\]\]");
            Regex r2 = new Regex(@"\[\[\s*" + Tools.TurnFirstToLower(escTitle) + @"\s*\]\]");

            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if (!Regex.IsMatch(zerothSection, "'''" + escTitle + "'''") && !WikiRegexes.Noinclude.IsMatch(articleText) && !WikiRegexes.Includeonly.IsMatch(articleText))
                zerothSectionHidden = r1.Replace(zerothSectionHidden, "'''" + articleTitle + @"'''");
            if (zerothSectionHiddenOriginal == zerothSectionHidden && !Regex.IsMatch(zerothSection, @"'''" + Tools.TurnFirstToLower(escTitle) + @"'''"))
                zerothSectionHidden = r2.Replace(zerothSectionHidden, "'''" + Tools.TurnFirstToLower(articleTitle) + @"'''");

            zerothSection = Hider2.AddBackMore(zerothSectionHidden);

            if (zerothSectionHiddenOriginal != zerothSectionHidden)
            {
                noChange = false;
                return (zerothSection + restOfArticle);
            }

            // ignore date articles (date in American or international format)
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle))
                return articleTextAtStart;

            Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            //if title in bold already exists in article, or page starts with something in bold, don't change anything
            if (boldTitleAlready1.IsMatch(articleText) || boldTitleAlready2.IsMatch(articleText)
                || BoldTitleAlready3.IsMatch(articleText))
                return articleTextAtStart;

            // so no self links to remove, check for the need to add bold
            string articleTextHidden = Hider3.HideMore(articleText);

            // first quick check: ignore articles with some bold in first 5% of hidemore article
            int fivepc = articleTextHidden.Length / 20;

            if (articleTextHidden.Substring(0, fivepc).Contains("'''"))
            {
                //articleText = Hider3.AddBackMore(articleTextHidden);
                return articleTextAtStart;
            }

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            // first try title with brackets removed
            if (regexBoldNoBrackets.IsMatch(articleTextHidden))
                articleTextHidden = regexBoldNoBrackets.Replace(articleTextHidden, "$1'''$2'''$3", 1);

            articleText = Hider3.AddBackMore(articleTextHidden);

            // check that the bold added is the first bit in bold in the main body of the article
            if (AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return articleText;
            }

            return articleTextAtStart;
        }
Пример #17
0
        public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary)
        {
            Summary = "";
            if (TyposCount == 0)
            {
                NoChange = true;
                return ArticleText;
            }

            if (IgnoreRegex.IsMatch(ArticleText))
            {
                NoChange = true;
                return ArticleText;
            }

            HideText RemoveText = new HideText(true, false, true);

            ArticleText = RemoveText.HideMore(ArticleText);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match m = RemoveTail.Match(ArticleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail)) ArticleText = ArticleText.Remove(m.Index);

            string originalText = ArticleText;
            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref ArticleText, ref strSummary);
            }

            NoChange = (originalText == ArticleText);

            ArticleText = RemoveText.AddBackMore(ArticleText + tail);

            if (!string.IsNullOrEmpty(strSummary))
            {
                strSummary = Variables.TypoSummaryTag + strSummary.Trim();
                Summary = strSummary;
            }

            return ArticleText;
        }
Пример #18
0
        /// <summary>
        /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox
        /// </summary>
        private bool AddedBoldIsValid(string articleText, string escapedTitle)
        {
            HideText Hider2 = new HideText(true, true, true);
            Regex RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase);

            int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index;

            int firstBoldPos = RegexFirstBold.Match(articleText).Length;

            articleText = WikiRegexes.NestedTemplates.Replace(articleText, "");

            articleText = Hider2.HideMore(articleText);

            // was bold added in first 5% of article?
            bool inFirst5Percent = false;

            int articlelength = articleText.Length;

            if (articlelength > 5)
                inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''");

            articleText = Hider2.AddBackMore(articleText);
            // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article
            return inFirst5Percent && boldAddedPos <= firstBoldPos;
        }
Пример #19
0
        /// <summary>
        /// Applies a series of defined find and replacements to the supplied article text.
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="editSummary"></param>
        /// <param name="strTitle"></param>
        /// <param name="beforeOrAfter">False if "before", true if "after"</param>
        /// <param name="majorChangesMade"></param>
        /// <returns>The modified article text.</returns>
        public string MultipleFindAndReplace(string articleText, string strTitle, bool beforeOrAfter, ref string editSummary, out bool majorChangesMade)
        {
            majorChangesMade = false;

            if (!HasReplacements)
            {
                return(articleText);
            }

            _replacedSummary = "";
            _removedSummary  = "";

            if (chkIgnoreMore.Checked)
            {
                articleText = _remove.HideMore(articleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                articleText = _remove.Hide(articleText);
            }

            foreach (Replacement rep in _replacementList)
            {
                if (!rep.Enabled || rep.BeforeOrAfter != beforeOrAfter)
                {
                    continue;
                }

                bool changeMade;
                articleText = PerformFindAndReplace(rep, articleText, strTitle, out changeMade);

                if (changeMade && !rep.Minor)
                {
                    majorChangesMade = true;
                }
            }

            if (chkIgnoreMore.Checked)
            {
                articleText = _remove.AddBackMore(articleText);
            }
            else if (chkIgnoreLinks.Checked)
            {
                articleText = _remove.AddBack(articleText);
            }

            if (chkAddToSummary.Checked)
            {
                if (!string.IsNullOrEmpty(_replacedSummary))
                {
                    editSummary = "replaced: " + _replacedSummary.Trim();
                }

                if (!string.IsNullOrEmpty(_removedSummary))
                {
                    if (!string.IsNullOrEmpty(editSummary))
                    {
                        editSummary += ", ";
                    }

                    editSummary += "removed: " + _removedSummary.Trim();
                }
            }

            return(articleText);
        }
Пример #20
0
        /// <summary>
        /// Performs typo fixes against the article text in multi-threaded mode
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange">True if no typos fixed</param>
        /// <param name="summary">Edit summary</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>Updated article text</returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;

            summary = "";
            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return(articleText);
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match  m    = RemoveTail.Match(articleText);
            string tail = m.Value;

            if (!string.IsNullOrEmpty(tail))
            {
                articleText = articleText.Remove(m.Index);
            }

            string originalText = articleText;
            string strSummary   = "";

            /* Run typos threaded, one thread per group for better performance
             * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart
             * http://www.dotnetperls.com/parameterizedthreadstart
             * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */
            resultSummary.Clear();
            resultArticleText.Clear();

            Thread[] array = new Thread[Groups.Count];
            int      i     = 0;

            foreach (TypoGroup tg in Groups)
            {
                array[i] = new Thread(delegate(object unused) { tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText); });
                array[i].Start(i);
                i++;
            }

            // Join all the threads: wait for all to complete
            for (int j = 0; j < array.Length; j++)
            {
                array[j].Join();
            }

            string groupSummary, groupArticleText;

            foreach (TypoGroup tg in Groups)
            {
                resultSummary.TryGetValue(tg.GroupSize, out groupSummary);
                resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText);

                if (groupSummary.Length > 0)
                {
                    if (strSummary.Length > 0)
                    {
                        // earlier thread had changes, so need to re-run this one
                        tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
                    }
                    else
                    {
                        strSummary  = groupSummary;
                        articleText = groupArticleText;
                    }
                }
            }

            noChange = originalText.Equals(articleText);

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return(removeText.AddBackMore(articleText + tail));
        }
Пример #21
0
        /// <summary>
        /// Fix date and decade formatting errors.
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public string FixDates(string ArticleText)
        {
            HideText hidetext = new HideText();
            ArticleText = hidetext.HideMore(ArticleText);
            {
                ArticleText = FixDatesRaw(ArticleText);

                //Remove 2 or more <br />'s
                //This piece's existance here is counter-intuitive, but it requires HideMore()
                //and I don't want to call this slow function yet another time --MaxSem
                ArticleText = Regex.Replace(ArticleText.Trim(), @"(<br[\s/]*> *){2,}", "\r\n", RegexOptions.IgnoreCase);
            }
            ArticleText = hidetext.AddBackMore(ArticleText);
            return ArticleText;
        }