This class provides functions for 'hiding' certain syntax by replacing it with unique tokens and then adding it back after an operation was performed on text
Exemplo n.º 1
0
        /// <summary>
        /// Extracts all of the interwiki and interwiki featured article links from the article text
        /// Ignores interwikis in comments/nowiki tags
        /// </summary>
        /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param>
        /// <returns>string of interwiki and interwiki featured article links</returns>
        public string Interwikis(ref string articleText)
        {
            string interWikiComment = "";

            if (InterLangRegex.IsMatch(articleText))
            {
                interWikiComment = InterLangRegex.Match(articleText).Value;
                articleText      = articleText.Replace(interWikiComment, "");
            }

            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment
            HideText hider = new HideText(false, true, false);

            articleText = hider.Hide(articleText);

            string interWikis = ListToString(RemoveLinkFGAs(ref articleText));

            if (interWikiComment.Length > 0)
            {
                interWikis += interWikiComment + "\r\n";
            }

            interWikis += ListToString(RemoveInterWikis(ref articleText));

            articleText = hider.AddBack(articleText);

            return(interWikis);
        }
 private string HideMore(string text, bool hideOnlyTargetOfWikilink)
 {
     Hider = new HideText();
     string s = Hider.HideMore(text, hideOnlyTargetOfWikilink);
     Assert.AreEqual(text, Hider.AddBackMore(s));
     return s;
 }
 private string Hide(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages)
 {
     Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages);
     string s = Hider.Hide(text);
     Assert.AreEqual(text, Hider.AddBack(s));
     return s;
 }
Exemplo n.º 4
0
        /// <summary>
        /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox
        /// </summary>
        private bool AddedBoldIsValid(string articleText, string escapedTitle)
        {
            HideText Hider2         = new HideText(true, true, true);
            Regex    RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase);

            int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index;

            int firstBoldPos = RegexFirstBold.Match(articleText).Length;

            articleText = WikiRegexes.NestedTemplates.Replace(articleText, "");

            articleText = Hider2.HideMore(articleText);

            // was bold added in first 5% of article?
            bool inFirst5Percent = false;

            int articlelength = articleText.Length;

            if (articlelength > 5)
            {
                inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''");
            }

            articleText = Hider2.AddBackMore(articleText);
            // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article
            return(inFirst5Percent && boldAddedPos <= firstBoldPos);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Extracts all of the interwiki and interwiki featured article links from the article text
        /// Ignores interwikis in comments/nowiki tags
        /// </summary>
        /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param>
        /// <returns>string of interwiki and interwiki featured article links</returns>
        public string Interwikis(ref string articleText)
        {
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment
            HideText hider = new HideText(false, true, false);

            articleText = hider.Hide(articleText);

            string interWikis = ListToString(RemoveLinkFGAs(ref articleText)) + ListToString(RemoveInterWikis(ref articleText));

            articleText = hider.AddBack(articleText);

            return(interWikis);
        }
        public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary)
        {
            Summary = "";
            if (TyposCount == 0)
            {
                NoChange = true;
                return(ArticleText);
            }

            if (IgnoreRegex.IsMatch(ArticleText))
            {
                NoChange = true;
                return(ArticleText);
            }

            HideText RemoveText = new HideText(true, false, true);

            ArticleText = RemoveText.HideMore(ArticleText);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match  m    = RemoveTail.Match(ArticleText);
            string tail = m.Value;

            if (!string.IsNullOrEmpty(tail))
            {
                ArticleText = ArticleText.Remove(m.Index);
            }

            string originalText = ArticleText;
            string strSummary   = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref ArticleText, ref strSummary);
            }

            NoChange = (originalText == ArticleText);

            ArticleText = RemoveText.AddBackMore(ArticleText + tail);

            if (!string.IsNullOrEmpty(strSummary))
            {
                strSummary = Variables.TypoSummaryTag + strSummary.Trim();
                Summary    = strSummary;
            }

            return(ArticleText);
        }
        /// <summary>
        /// Performs typo fixes against the article text.
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange"></param>
        /// <param name="summary"></param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns></returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;

            summary = "";
            if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return(articleText);
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match  m    = RemoveTail.Match(articleText);
            string tail = m.Value;

            if (!string.IsNullOrEmpty(tail))
            {
                articleText = articleText.Remove(m.Index);
            }

            string originalText = articleText;
            string strSummary   = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
            }

            noChange = (originalText.Equals(articleText));

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return(removeText.AddBackMore(articleText + tail));
        }
Exemplo n.º 8
0
        /// <summary>
        /// Checks for known typos on the page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>whether there are typos on the page</returns>
        public bool DetectTypo(string articleText, string articleTitle)
        {
            string originalArticleText = articleText;

            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
            {
                return(false);
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);

            if (m.Success)
            {
                articleText = articleText.Remove(m.Index);
            }

            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);

                if (strSummary.Length > 0)
                {
                    return(true);
                }
            }

            return(false);
        }
Exemplo n.º 9
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// 1) Cleans up bolded self wikilinks
        /// 2) Cleans up self wikilinks
        /// 3) '''Emboldens''' the first occurrence of the article title
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            noChange = true;
            List<string> alltemplates = GetAllTemplates(articleText);

            if(TemplateExists(alltemplates, NoBoldTitle))
                return articleText;

            HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true);

            // 1) clean up bolded self links first, provided no noinclude use in article
            string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText);

            if(!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
                articleText = afterSelfLinks;

            // 2) Clean up self wikilinks
            string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText);
            string restOfArticle = articleText.Substring(zerothSection.Length);
            string zerothSectionHidden, zerothSectionHiddenOriginal;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if(!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
            {
                // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
                zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false);
                zerothSectionHiddenOriginal = zerothSectionHidden;
                zerothSectionHidden = SelfLinks(zerothSectionHidden, articleTitle);
                zerothSection = Hider2.AddBackMore(zerothSectionHidden);

                if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden))
                {
                    noChange = false;
                    return (zerothSection + restOfArticle);
                }
            }

            // Performance check: if article title not in zeroth section have nothing further to do
            if(zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0)
                return articleTextAtStart;

            // 3) '''Emboldens''' the first occurrence of the article title

            // ignore date articles (date in American or international format), nihongo title
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle)
                || TemplateExists(alltemplates, NihongoTitle))
                return articleTextAtStart;

            string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));
            Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything
            // ignore any bold in infoboxes
            if(BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection))
                return articleTextAtStart;

            string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText));
            if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox)
                || BoldTitleAlready3.IsMatch(articleTextNoInfobox))
                return articleTextAtStart;

            // so no self links to remove, check for the need to add bold
            string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, "");

            // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc.
            int fivepc = articleTextNoTemplates.Length / 20;

            if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''"))
                return articleTextAtStart;

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            zerothSectionHidden = Hider3.HideMore(zerothSection);
            zerothSectionHiddenOriginal = zerothSectionHidden;

            // first try title with brackets removed
            zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1);

            zerothSection = Hider3.AddBackMore(zerothSectionHidden);

            articleText = zerothSection + restOfArticle;

            // check that the bold added is the first bit in bold in the main body of the article
            if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return articleText;
            }

            return articleTextAtStart;
        }
Exemplo n.º 10
0
        public void HideMore()
        {
            Hider = new HideText(true, false, true);

            string text = Hider.HideMore("[[foo]]", false, true);
            RegexAssert.IsMatch(AllHidden, text);
            text = Hider.AddBackMore(text);
            Assert.AreEqual("[[foo]]", text);
        }
Exemplo n.º 11
0
        public void Unicodify()
        {
            Parsers Parser = new Parsers();
            HideText RemoveText = new HideText(false, true, false);
            Article a = new Article("a", @"'''test'''. z &amp; a&Dagger; &dagger;.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ");

            a.Unicodify(true, Parser, RemoveText);

            Assert.AreEqual(@"'''test'''. z & a‡ †.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ", a.ArticleText, "Text unicodified");

            a = new Article("a", @"'''test'''. z &amp; {{t|a&Dagger; &dagger;}}.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ");

            a.Unicodify(true, Parser, RemoveText);

            Assert.AreEqual(@"'''test'''. z & {{t|a&Dagger; &dagger;}}.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ", a.ArticleText, "Text unicodified, hidemore used");

            a = new Article("a", @"ABC");
            a.Unicodify(true, Parser, RemoveText);
            Assert.AreEqual(@"ABC", a.ArticleText, "No change");
        }
Exemplo n.º 12
0
 /// <summary>
 /// Sets the date (month & year) for undated cleanup tags that take a date
 /// Avoids changing tags in unformatted text areas (wiki comments etc.)
 /// Note: bugzilla 2700 means {{ssubst}} within ref tags doesn't work, AWB doesn't do anything about it
 /// </summary>
 /// <param name="articleText">The wiki text of the article.</param>
 /// <returns>The updated article text</returns>
 public static string TagUpdater(string articleText)
 {
     HideText ht = new HideText();
     articleText = ht.HideUnformatted(articleText);
     
     foreach (KeyValuePair<Regex, string> k in RegexTagger)
     {
         articleText = k.Key.Replace(articleText, 
                                     m => (Tools.GetTemplateParameterValue(m.Value, "Date").Length > 0 ? 
                                           Tools.RenameTemplateParameter(m.Value, "Date", "date") : k.Value.Replace("$1", m.Groups[1].Value)));
     }
     return ht.AddBackUnformatted(articleText);
 }
Exemplo n.º 13
0
        /// <summary>
        /// Performs typo fixes against the article text.
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange"></param>
        /// <param name="summary"></param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns></returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;
            summary = "";
            if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return articleText;
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail)) articleText = articleText.Remove(m.Index);

            string originalText = articleText;
            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
            }

            noChange = (originalText.Equals(articleText));

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return removeText.AddBackMore(articleText + tail);
        }
Exemplo n.º 14
0
        // Covered by: LinkTests.TestBulletExternalLinks()
        /// <summary>
        /// Adds bullet points to external links after "external links" header
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public static string BulletExternalLinks(string ArticleText)
        {
            int intStart = 0;
            string articleTextSubstring = "";

            Match m = Regex.Match(ArticleText, @"=\s*(?:external)?\s*links\s*=", RegexOptions.IgnoreCase | RegexOptions.RightToLeft);

            if (!m.Success)
                return ArticleText;

            intStart = m.Index;

            articleTextSubstring = ArticleText.Substring(intStart);
            ArticleText = ArticleText.Substring(0, intStart);
            HideText ht = new HideText(false, true, false);
            articleTextSubstring = ht.HideMore(articleTextSubstring);
            articleTextSubstring = Regex.Replace(articleTextSubstring, "(\r\n|\n)?(\r\n|\n)(\\[?http)", "$2* $3");
            articleTextSubstring = ht.AddBackMore(articleTextSubstring);
            ArticleText += articleTextSubstring;

            return ArticleText;
        }
Exemplo n.º 15
0
        /// <summary>
        /// Extracts all of the interwiki and interwiki featured article links from the article text
        /// Ignores interwikis in comments/nowiki tags
        /// </summary>
        /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param>
        /// <returns>string of interwiki and interwiki featured article links</returns>
        public string Interwikis(ref string articleText)
        {
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment
            HideText hider = new HideText(false, true, false);

            articleText = hider.Hide(articleText);

            string interWikis = ListToString(RemoveLinkFGAs(ref articleText)) + ListToString(RemoveInterWikis(ref articleText));

            articleText = hider.AddBack(articleText);

            return interWikis;
        }
Exemplo n.º 16
0
        /// <summary>
        /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox
        /// </summary>
        private bool AddedBoldIsValid(string articleText, string escapedTitle)
        {
            HideText Hider2 = new HideText(true, true, true);
            string articletextoriginal = articleText;
            Regex regexBoldAdded = new Regex(@"^(.*?)'''" + escapedTitle, RegexOptions.Singleline);

            int boldAddedPos = regexBoldAdded.Match(articleText).Length - Regex.Unescape(escapedTitle).Length;

            int firstBoldPos = RegexFirstBold.Match(articleText).Length;

            articleText = Hider2.HideMore(articleText);

            // was bold added in first 5% of article?
            bool inFirst5Percent = articleText.Substring(0, articleText.Length / 20).Contains("'''");

            //articleText = Hider2.AddBackMore(articleText);

            // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article
            if (inFirst5Percent && boldAddedPos <= firstBoldPos)
                return true;

            // second check: bold just after infobox
            Regex boldAfterInfobox = new Regex(WikiRegexes.InfoBox + @"\s*'''" + escapedTitle);

            return boldAfterInfobox.IsMatch(articletextoriginal);
        }
Exemplo n.º 17
0
 /// <summary>
 /// Sets the date (month & year) for undated cleanup tags that take a date
 /// Avoids changing tags in unformatted text areas (wiki comments etc.)
 /// </summary>
 /// <param name="articleText">The wiki text of the article.</param>
 /// <returns>The updated article text</returns>
 public static string TagUpdater(string articleText)
 {
     HideText ht = new HideText();
     articleText = ht.HideUnformatted(articleText);
     
     foreach (KeyValuePair<Regex, string> k in RegexTagger)
     {
         articleText = k.Key.Replace(articleText, k.Value);
     }
     return ht.AddBackUnformatted(articleText);
 }
Exemplo n.º 18
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            HideText Hider2 = new HideText();
            HideText Hider3 = new HideText(true, true, true);
            // clean up bolded self links first
            articleText = BoldedSelfLinks(articleTitle, articleText);

            noChange = true;
            string escTitle = Regex.Escape(articleTitle);
            string escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));

            string articleTextAtStart = articleText;

            string zerothSection = WikiRegexes.ZerothSection.Match(articleText).Value;
            string restOfArticle = articleText.Remove(0, zerothSection.Length);

            // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
            string zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false);
            string zerothSectionHiddenOriginal = zerothSectionHidden;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            Regex r1 = new Regex(@"\[\[\s*" + escTitle + @"\s*\]\]");
            Regex r2 = new Regex(@"\[\[\s*" + Tools.TurnFirstToLower(escTitle) + @"\s*\]\]");

            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if (!Regex.IsMatch(zerothSection, "'''" + escTitle + "'''") && !WikiRegexes.Noinclude.IsMatch(articleText) && !WikiRegexes.Includeonly.IsMatch(articleText))
                zerothSectionHidden = r1.Replace(zerothSectionHidden, "'''" + articleTitle + @"'''");
            if (zerothSectionHiddenOriginal == zerothSectionHidden && !Regex.IsMatch(zerothSection, @"'''" + Tools.TurnFirstToLower(escTitle) + @"'''"))
                zerothSectionHidden = r2.Replace(zerothSectionHidden, "'''" + Tools.TurnFirstToLower(articleTitle) + @"'''");

            zerothSection = Hider2.AddBackMore(zerothSectionHidden);

            if (zerothSectionHiddenOriginal != zerothSectionHidden)
            {
                noChange = false;
                return (zerothSection + restOfArticle);
            }

            // ignore date articles (date in American or international format)
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle))
                return articleTextAtStart;

            Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            //if title in bold already exists in article, or page starts with something in bold, don't change anything
            if (boldTitleAlready1.IsMatch(articleText) || boldTitleAlready2.IsMatch(articleText)
                || BoldTitleAlready3.IsMatch(articleText))
                return articleTextAtStart;

            // so no self links to remove, check for the need to add bold
            string articleTextHidden = Hider3.HideMore(articleText);

            // first quick check: ignore articles with some bold in first 5% of hidemore article
            int fivepc = articleTextHidden.Length / 20;

            if (articleTextHidden.Substring(0, fivepc).Contains("'''"))
            {
                //articleText = Hider3.AddBackMore(articleTextHidden);
                return articleTextAtStart;
            }

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            // first try title with brackets removed
            if (regexBoldNoBrackets.IsMatch(articleTextHidden))
                articleTextHidden = regexBoldNoBrackets.Replace(articleTextHidden, "$1'''$2'''$3", 1);

            articleText = Hider3.AddBackMore(articleTextHidden);

            // check that the bold added is the first bit in bold in the main body of the article
            if (AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return articleText;
            }

            return articleTextAtStart;
        }
Exemplo n.º 19
0
        public void HideImages()
        {
            Assert.IsFalse(Hide(@"[[File:foo.jpg]]").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg]]").Contains("foo"), "with space");
            Assert.IsFalse(Hide(@"[[File:foo.jpeg]]").Contains("foo"), "jpeg");
            Assert.IsFalse(Hide(@"[[File:foo.JPEG]]").Contains("foo"), "JPEG");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg]]").Contains("foo"), "space and jpeg");
            Assert.IsFalse(Hide(@"[[Image:foo.jpeg]]").Contains("foo"), "Image jpeg");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpg]]").Contains("foo"), "image jpeg space");
            Assert.IsFalse(Hide(@"[[File:foo.jpg|").Contains("foo"), "To pipe");
            Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg|").Contains("foo"), "Space to pipe");
            Assert.IsFalse(Hide(@"[[File:foo.jpeg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo.jpeg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.jpg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[File:foo.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[File:foo.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo_here.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:westminster.tube.station.jubilee.arp.jpg|").Contains("westminster.tube.station.jubilee.arp"), "Dot name");

            Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").StartsWith("[["), "Retain starting brackets");
            Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").Contains(@"thumb|140px|[[Jo]] Assistant [[Ge]]]]"), "Retain ending brackets");

            AssertAllHidden(@"<imagemap>
            File:Blogs001.jpeg|Description
            File:Blogs002.jpeg|Description
            </imagemap>");

            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target");
            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets");

            HideText h = new HideText(true, false, false);
            Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks");
        }
Exemplo n.º 20
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
            // don't tag redirects/outside article namespace/no tagging changes
            if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText))
                return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped))/2;

            // remove stub tags from long articles
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();
                tagsRemoved.Add("stub");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            // on en wiki, remove expand template when a stub template exists
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests/Archive_5#Remove_.7B.7Bexpand.7D.7D_when_a_stub_template_exists
            if (Variables.LangCode == "en" && WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                WikiRegexes.Expand.IsMatch(commentsCategoriesStripped))
            {
                articleText = WikiRegexes.Expand.Replace(articleText, "");
                tagsRemoved.Add("expand");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            
            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            int linkCount = Tools.LinkCount(commentsStripped);

            #if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
                #endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                List<Article> Cats = CategoryProv.MakeList(new[] {articleTitle});
                List<Article> CatsNotStubs = new List<Article>();

                foreach (Article a in Cats)
                {
                    if (!a.Name.EndsWith(" stubs") && !a.Name.EndsWith(":Stubs"))
                        CatsNotStubs.Add(a);
                }
                totalCategories = CatsNotStubs.Count;
            }

            if (linkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText))
            {
                articleText = WikiRegexes.DeadEnd.Replace(articleText, new MatchEvaluator(SectionTagME));
                
                if(!WikiRegexes.DeadEnd.IsMatch(articleText))
                    tagsRemoved.Add("deadend");
            }

            // discount persondata along with comments and categories from wikify and stub evaluation
            int length = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "").Length + 1;
            bool underlinked = (linkCount < 0.0025*length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag
                articleText += Tools.Newline("{{stub}}", 3);
                tagsAdded.Add("stub");
                commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            }

            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl"
                && !Tools.NestedTemplateRegex("cat improve").IsMatch(articleText)
                // category count is from API; don't add uncat tag if genfixes added person categories
                && !WikiRegexes.DeathsOrLivingCategory.IsMatch(articleText)
                && !WikiRegexes.BirthsCategory.IsMatch(articleText))
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter +
                        @"}}";
                    tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                }
                else
                {
                    // add uncategorized tag
                    articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                    tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "");
                    tagsRemoved.Add("uncategorised");
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value;
                    if (!uncatname.Contains("stub"))
                        articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub");
                }
            }

            if (linkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && Variables.LangCode != "sv"
                && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b"))
            {
                // add dead-end tag
                articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                tagsAdded.Add("[[:Category:Dead-end pages|deadend]]");
            }

            if (linkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify"))
            {
                // add wikify tag
                articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                tagsAdded.Add("[[WP:WFY|wikify]]");
            }
            else if (linkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                articleText = WikiRegexes.Wikify.Replace(articleText, new MatchEvaluator(SectionTagME));
                
                if(!WikiRegexes.Wikify.IsMatch(articleText))
                    tagsRemoved.Add("wikify");
            }

            // rename unreferenced --> refimprove if has existing refs
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && WikiRegexes.Refs.Matches(commentsCategoriesStripped).Count > 0)
            {
                articleText = Tools.RenameTemplate(articleText, "unreferenced", "refimprove", true);
                
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if(m.Success)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value,  "unreferenced", "refimprove");
                    if(!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0)
            {
                Parsers p = new Parsers();
                HideText ht = new HideText();

                articleText = ht.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = ht.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
Exemplo n.º 21
0
        /// <summary>
        /// Extracts all of the interwiki and interwiki featured article links from the article text
        /// Ignores interwikis in comments/nowiki tags
        /// </summary>
        /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param>
        /// <returns>string of interwiki and interwiki featured article links</returns>
        public string Interwikis(ref string articleText)
        {
            string interWikiComment = "";
            if (InterLangRegex.IsMatch(articleText))
            {
                interWikiComment = InterLangRegex.Match(articleText).Value;
                articleText = articleText.Replace(interWikiComment, "");
            }
            
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment
            HideText hider = new HideText(false, true, false);

            articleText = hider.Hide(articleText);

            string interWikis = ListToString(RemoveLinkFGAs(ref articleText));
            
            if(interWikiComment.Length > 0)
                interWikis += interWikiComment + "\r\n";
            
            interWikis += ListToString(RemoveInterWikis(ref articleText));

            articleText = hider.AddBack(articleText);
            
            return interWikis;
        }
Exemplo n.º 22
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
 			if(!TaggerPermitted(articleText, articleTitle))
 				return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();
            int tagsrenamed = 0;

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped, 999) + Tools.WordCount(crapStripped, 999)) / 2;

            // remove stub tags from long articles, don't move section stubs
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();

                if (Variables.LangCode.Equals("ar"))
                {
                    tagsRemoved.Add("بذرة");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    tagsRemoved.Add("تقاوى");
                }
                else if (Variables.LangCode.Equals("hy"))
                {
                    tagsRemoved.Add("Անավարտ");
                }
                else
                {
                    tagsRemoved.Add("stub");
                }
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            //remove disambiguation if disambiguation cleanup exists (en-wiki only)
            if (Variables.LangCode.Equals("en") && WikiRegexes.DisambigsCleanup.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.DisambigsGeneral.Replace(articleText, "").Trim();
            }

            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            // ignore commented out wikilinks, and any in {{Proposed deletion/dated}}
            int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, ""));

            #if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
                #endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                // also don't count "Proposed deletion..." cats
                // limitation: in the unlikely event that the article has only redlinked cats then it is {{uncat}} but we won't tag it as such
                totalCategories = RegularCategories(articleText).Count;

                // templates may add categories to page that are not [[Category...]] links, so use API call for accurate Category count
                if(totalCategories == 0)
                    totalCategories = RegularCategories(CategoryProv.MakeList(new[] { articleTitle })).Count;
            }

            // remove dead end if > 0 wikilinks on page
            if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, "");
                else
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.DeadEnd.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else
                    {
                        tagsRemoved.Add("deadend");
                    }
                }
            }

            // discount persondata, comments, infoboxes and categories from wikify/underlinked and stub evaluation
            string lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "");
            lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, "");
            lengthtext = Drugbox.Replace(lengthtext, "");

            int length = lengthtext.Length + 1;
            bool underlinked = (wikiLinkCount < 0.0025 * length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag. Exclude pages their title starts with "List of..."
                if (!ListOf.IsMatch(articleTitle) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة}}", 3);
                        tagsAdded.Add("بذرة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى}}", 3);
                        tagsAdded.Add("تقاوى");
                    }
                    else if (Variables.LangCode.Equals("hy"))
                    {
                        articleText += Tools.Newline("{{Անավարտ}}", 3);
                        tagsAdded.Add("Անավարտ");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{stub}}", 3);
                        tagsAdded.Add("stub");
                    }
                    commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
                }
            }

            // rename existing {{improve categories}} else add uncategorized tag
            if (totalCategories == 0 && ImproveCategories.IsMatch(articleText))
                articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized");

            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl")
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]");
                    }
                    else if(Variables.LangCode.Equals("hy")) // same template for uncat and uncat stub
                    {
                        articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("Կատեգորիա չկա");
                    }
                    else if(Variables.LangCode.Equals("sv")) // same template for uncat and uncat stub
                    {
                        articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                    }
                }
                else
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]");
                    }
                    else if(Variables.LangCode.Equals("el"))
                    {
                        articleText += Tools.Newline("{{Ακατηγοριοποίητο|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Πρότυπο:Ακατηγοριοποίητο|ακατηγοριοποίητο]]");
                    }
                    else if(Variables.LangCode.Equals("hy"))
                    {
                        articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("Կատեգորիա չկա");
                    }
                    else if(Variables.LangCode.Equals("sv"))
                    {
                        articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                    }
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart();
                    if (Variables.LangCode.Equals("ar"))
                        tagsRemoved.Add("غير مصنفة");
                    else if (Variables.LangCode.Equals("arz"))
                        tagsRemoved.Add("مش متصنفه");
                    else
                        tagsRemoved.Add("uncategorised");
                    
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                   // rename uncat to uncat stub if no uncat stub. If uncat and uncat stub, remove uncat.
                    bool uncatstub = false;
                    foreach(Match u in WikiRegexes.Uncat.Matches(articleText))
                    {
                        if(WikiRegexes.Stub.IsMatch(u.Value))
                        {
                            uncatstub = true;
                            break;
                        }
                    }

                    articleText = WikiRegexes.Uncat.Replace(articleText, u2 => {
                                                                if (!uncatstub) // rename
                                                                {
                                                                    tagsrenamed++;
                                                                    if (Variables.LangCode.Equals("ar"))
                                                                        return Tools.RenameTemplate(u2.Value, "بذرة غير مصنفة");
                                                                    else if (Variables.LangCode.Equals("arz"))
                                                                        return Tools.RenameTemplate(u2.Value, "تقاوى مش متصنفه");
                                                                    else if (Variables.LangCode.Equals("en") || Variables.LangCode.Equals("simple"))
                                                                        return Tools.RenameTemplate(u2.Value, "Uncategorized stub");
                                                                }
                                                                else // already uncat stub so remove plain uncat
                                                                {
                                                                    if(!WikiRegexes.Stub.IsMatch(u2.Value))
                                                                    {
                                                                        if (Variables.LangCode.Equals("ar"))
                                                                            tagsRemoved.Add("غير مصنفة");
                                                                        else if (Variables.LangCode.Equals("arz"))
                                                                            tagsRemoved.Add("مش متصنفه");
                                                                        else
                                                                            tagsRemoved.Add("uncategorised");
                                                                        return "";
                                                                    }
                                                                }
                                                                return u2.Value;
                                                            });
                }
            }

            if (wikiLinkCount == 0 &&
                !WikiRegexes.DeadEnd.IsMatch(articleText) &&
                !WikiRegexes.SIAs.IsMatch(articleText) &&
                !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText) &&
                !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle)
               )
            {
                // add dead-end tag
                // no blank line between dead end and orphan tags for ar/arz
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + (WikiRegexes.Orphan.IsMatch(articleText) ? "" : "\r\n") + articleText;
                    tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]");
                    // if dead end then remove underlinked/wikify
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
                        articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكي");
                    }
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + articleText;
                    tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
                        articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكى");
                    }
                }
                else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText)
                         && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b")
                         && !MinorPlanetListFooter.IsMatch(articleText))
                {
                    // Don't add excess newlines between new tags
                    articleText = "{{Dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText;
                    tagsAdded.Add("[[CAT:DE|deadend]]");
                    // if dead end then remove underlinked
                    if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1)
                    {
                        articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("underlinked");
                    }
                }
            }
            // add underlinked/wikify tag, don't add underlinked/wikify if {{dead end}} already present
            // Dont' tag SIA pages, may create wikilinks from templates
            else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                     && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")
                     && !WikiRegexes.DeadEnd.IsMatch(articleText)
                     && !WikiRegexes.SIAs.IsMatch(articleText)
                     && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText)
                     && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle))
            {
                // Avoid excess newlines between templates
                string templateEnd = "}}\r\n" + (articleText.TrimStart().StartsWith(@"{{") ? "" : "\r\n");
                
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText.TrimStart();
                    tagsAdded.Add("[[وب:ويكي|ويكي]]");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[قالب:ويكى|ويكى]]");
                }
                else if (Variables.LangCode.Equals("sv"))
                {
                    articleText = "{{Ickewiki|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[WP:PW|ickewiki]]");
                }
                else
                {
                    articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[CAT:UL|underlinked]]");
                }
            }
            else if (wikiLinkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.Wikify.Replace(articleText, "");
                else
                    // remove wikify, except section templates or wikify tags with reason parameter specified
                    articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.Wikify.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("ويكي");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("ويكى");
                    }
                    else
                    {
                        tagsRemoved.Add("underlinked");
                    }
                }
            }

            // rename unreferenced --> refimprove if has existing refs, update date
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0)
            {
                articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"));

                // update tag in old-style multiple issues
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove");
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}");
                    if (!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0 || tagsrenamed > 0)
            {
                Parsers p = new Parsers();
                HideText hider = new HideText();

                articleText = hider.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = hider.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                // Don't sort interwikis, we can't specify the correct InterWikiSortOrder
                p.SortInterwikis = false;
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
Exemplo n.º 23
0
        /// <summary>
        /// Performs typo fixes against the article text in multi-threaded mode
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange">True if no typos fixed</param>
        /// <param name="summary">Edit summary</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>Updated article text</returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;

            summary = "";
            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return(articleText);
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match  m    = RemoveTail.Match(articleText);
            string tail = m.Value;

            if (!string.IsNullOrEmpty(tail))
            {
                articleText = articleText.Remove(m.Index);
            }

            string originalText = articleText;
            string strSummary   = "";

            /* Run typos threaded, one thread per group for better performance
             * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart
             * http://www.dotnetperls.com/parameterizedthreadstart
             * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */
            resultSummary.Clear();
            resultArticleText.Clear();

            Thread[] array = new Thread[Groups.Count];
            int      i     = 0;

            foreach (TypoGroup tg in Groups)
            {
                array[i] = new Thread(delegate(object unused) { tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText); });
                array[i].Start(i);
                i++;
            }

            // Join all the threads: wait for all to complete
            for (int j = 0; j < array.Length; j++)
            {
                array[j].Join();
            }

            string groupSummary, groupArticleText;

            foreach (TypoGroup tg in Groups)
            {
                resultSummary.TryGetValue(tg.GroupSize, out groupSummary);
                resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText);

                if (groupSummary.Length > 0)
                {
                    if (strSummary.Length > 0)
                    {
                        // earlier thread had changes, so need to re-run this one
                        tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
                    }
                    else
                    {
                        strSummary  = groupSummary;
                        articleText = groupArticleText;
                    }
                }
            }

            noChange = originalText.Equals(articleText);

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return(removeText.AddBackMore(articleText + tail));
        }
 private string HideMore(string text, bool HideOnlyTargetOfWikilink)
 {
     Hider = new HideText();
     return Hider.HideMore(text, HideOnlyTargetOfWikilink);
 }
Exemplo n.º 25
0
        // Covered by: LinkTests.FixDates()
        /// <summary>
        /// Fix date and decade formatting errors.
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public string FixDates(string ArticleText)
        {
            HideText hidetext = new HideText();
            ArticleText = hidetext.HideMore(ArticleText);
            {
                ArticleText = FixDatesRaw(ArticleText);

                //Remove 2 or more <br />'s
                //This piece's existance here is counter-intuitive, but it requires HideMore()
                //and I don't want to call this slow function yet another time --MaxSem
                ArticleText = SyntaxRemoveBr.Replace(ArticleText, "\r\n");
                ArticleText = SyntaxRemoveParagraphs.Replace(ArticleText, "\r\n\r\n");

            }
            ArticleText = hidetext.AddBackMore(ArticleText);
            return ArticleText;
        }
 private string Hide(string text, bool HideExternalLinks, bool LeaveMetaHeadings, bool HideImages)
 {
     Hider = new HideText(HideExternalLinks, LeaveMetaHeadings, HideImages);
     return Hider.Hide(text);
 }
Exemplo n.º 27
0
 public void PerformUniversalGeneralFixes()
 {
     HideText H = new HideText();
     MockSkipOptions S = new MockSkipOptions();
     Article ar1 = new Article("Hello", " '''Hello''' world text");
     ar1.PerformUniversalGeneralFixes();
     ar1.PerformGeneralFixes(parser, H, S, false, false, false);
     Assert.AreEqual("'''Hello''' world text", ar1.ArticleText);
 }
Exemplo n.º 28
0
        public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary)
        {
            Summary = "";
            if (TyposCount == 0)
            {
                NoChange = true;
                return ArticleText;
            }

            if (IgnoreRegex.IsMatch(ArticleText))
            {
                NoChange = true;
                return ArticleText;
            }

            HideText RemoveText = new HideText(true, false, true);

            ArticleText = RemoveText.HideMore(ArticleText);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match m = RemoveTail.Match(ArticleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail)) ArticleText = ArticleText.Remove(m.Index);

            string originalText = ArticleText;
            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref ArticleText, ref strSummary);
            }

            NoChange = (originalText == ArticleText);

            ArticleText = RemoveText.AddBackMore(ArticleText + tail);

            if (!string.IsNullOrEmpty(strSummary))
            {
                strSummary = Variables.TypoSummaryTag + strSummary.Trim();
                Summary = strSummary;
            }

            return ArticleText;
        }
 private string Hide(string text)
 {
     hider = new HideText();
     return hider.HideMore(text);
 }
 private string HideMore(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages)
 {
     Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages);
     return Hider.HideMore(text);
 }
Exemplo n.º 31
0
 private void AssertBothHidden(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages)
 {
     Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages);
     AssertAllHidden(text);
     AssertAllHiddenMore(text);
 }
Exemplo n.º 32
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// 1) Cleans up bolded self wikilinks
        /// 2) Cleans up self wikilinks
        /// 3) '''Emboldens''' the first occurrence of the article title
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            noChange = true;
            List <string> alltemplates = GetAllTemplates(articleText);

            if (TemplateExists(alltemplates, NoBoldTitle))
            {
                return(articleText);
            }

            HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true);

            // 1) clean up bolded self links first, provided no noinclude use in article
            string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText);

            if (!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
            {
                articleText = afterSelfLinks;
            }

            // 2) Clean up self wikilinks
            string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText);
            string restOfArticle = articleText.Substring(zerothSection.Length);
            string zerothSectionHidden, zerothSectionHiddenOriginal;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if (!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
            {
                // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
                zerothSectionHidden         = Hider2.HideMore(zerothSection, false, false, false);
                zerothSectionHiddenOriginal = zerothSectionHidden;
                zerothSectionHidden         = SelfLinks(zerothSectionHidden, articleTitle);
                zerothSection = Hider2.AddBackMore(zerothSectionHidden);

                if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden))
                {
                    noChange = false;
                    return(zerothSection + restOfArticle);
                }
            }

            // Performance check: if article title not in zeroth section have nothing further to do
            if (zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0)
            {
                return(articleTextAtStart);
            }

            // 3) '''Emboldens''' the first occurrence of the article title

            // ignore date articles (date in American or international format), nihongo title
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle) ||
                TemplateExists(alltemplates, NihongoTitle))
            {
                return(articleTextAtStart);
            }

            string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));
            Regex  boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex  boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything
            // ignore any bold in infoboxes
            if (BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection))
            {
                return(articleTextAtStart);
            }

            string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText));

            if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox) ||
                BoldTitleAlready3.IsMatch(articleTextNoInfobox))
            {
                return(articleTextAtStart);
            }

            // so no self links to remove, check for the need to add bold
            string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, "");

            // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc.
            int fivepc = articleTextNoTemplates.Length / 20;

            if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''"))
            {
                return(articleTextAtStart);
            }

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            zerothSectionHidden         = Hider3.HideMore(zerothSection);
            zerothSectionHiddenOriginal = zerothSectionHidden;

            // first try title with brackets removed
            zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1);

            zerothSection = Hider3.AddBackMore(zerothSectionHidden);

            articleText = zerothSection + restOfArticle;

            // check that the bold added is the first bit in bold in the main body of the article
            if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return(articleText);
            }

            return(articleTextAtStart);
        }
Exemplo n.º 33
0
        public void HideImages()
        {
            AssertAllHidden(@"[[File:foo.jpg]]");
            AssertAllHidden(@"[[File:foo with space and 0004.jpg]]");
            AssertAllHidden(@"[[File:foo.jpeg]]");
            AssertAllHidden(@"[[File:foo.JPEG]]");
            AssertAllHidden(@"[[Image:foo with space and 0004.jpeg]]");
            AssertAllHidden(@"[[Image:foo.jpeg]]");
            AssertAllHidden(@"[[Image:foo with space and 0004.jpg]]");
            AssertAllHidden(@"[[File:foo.jpg|");
            AssertAllHidden(@"[[File:foo with space and 0004.jpg|");
            AssertAllHidden(@"[[File:foo.jpeg|");
            AssertAllHidden(@"[[Image:foo with space and 0004.jpeg|");
            AssertAllHidden(@"[[Image:foo.jpeg|");
            AssertAllHidden(@"[[Image:foo with SPACE() and 0004.jpg|");
            AssertAllHidden(@"[[File:foo.gif|");
            AssertAllHidden(@"[[Image:foo with space and 0004.gif|");
            AssertAllHidden(@"[[Image:foo.gif|");
            AssertAllHidden(@"[[Image:foo with SPACE() and 0004.gif|");
            AssertAllHidden(@"[[File:foo.png|");
            AssertAllHidden(@"[[Image:foo with space and 0004.png|");
            AssertAllHidden(@"[[Image:foo_here.png|");
            AssertAllHidden(@"[[Image:foo with SPACE() and 0004.png|");
            AssertAllHidden(@"[[Image:westminster.tube.station.jubilee.arp.jpg|");

            AssertAllHidden(@"<imagemap>
            File:Blogs001.jpeg|Description
            File:Blogs002.jpeg|Description
            </imagemap>");

            AssertBothHidden(@"[[File:foo.jpg]]");
            AssertBothHidden(@"[[Image:foo with space and 0004.png|");
            AssertBothHidden(@"[[Image:foo_here.png|");

            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target");
            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets");

            HideText h = new HideText(true, false, false);
            Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks");
        }
Exemplo n.º 34
0
        /// <summary>
        /// Performs typo fixes against the article text in multi-threaded mode
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange">True if no typos fixed</param>
        /// <param name="summary">Edit summary</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>Updated article text</returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;
            summary = "";
            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return articleText;
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            // remove newlines, whitespace and hide tokens from bottom
            // to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail))
                articleText = articleText.Remove(m.Index);

            string originalText = articleText;
            string strSummary = "";
            /* Run typos threaded, one thread per group for better performance
             * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart
             * http://www.dotnetperls.com/parameterizedthreadstart
             * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */
            resultSummary.Clear();
            resultArticleText.Clear();

            Thread[] array = new Thread[Groups.Count];
            int i = 0;
            foreach (TypoGroup tg in Groups)
            {
                array[i] =
                    new Thread(
                        delegate()
                        {
                            tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText);
                        });
                array[i].Start();
                i++;
            }

            // Join all the threads: wait for all to complete
            foreach (Thread t in array)
            {
                t.Join();
            }

            foreach (TypoGroup tg in Groups)
            {
                string groupSummary;
                resultSummary.TryGetValue(tg.GroupSize, out groupSummary);
                string groupArticleText;
                resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText);

                if (groupSummary.Length > 0)
                {
                    if (strSummary.Length > 0)
                    {
                        // earlier thread had changes, so need to re-run this one
                        tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
                    }
                    else
                    {
                        strSummary = groupSummary;
                        articleText = groupArticleText;
                    }
                }
            }

            noChange = originalText.Equals(articleText);

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return removeText.AddBackMore(articleText + tail);
        }
Exemplo n.º 35
0
        /// <summary>
        /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox
        /// </summary>
        private bool AddedBoldIsValid(string articleText, string escapedTitle)
        {
            HideText Hider2 = new HideText(true, true, true);
            Regex RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase);

            int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index;

            int firstBoldPos = RegexFirstBold.Match(articleText).Length;

            articleText = WikiRegexes.NestedTemplates.Replace(articleText, "");

            articleText = Hider2.HideMore(articleText);

            // was bold added in first 5% of article?
            bool inFirst5Percent = false;

            int articlelength = articleText.Length;

            if (articlelength > 5)
                inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''");

            articleText = Hider2.AddBackMore(articleText);
            // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article
            return inFirst5Percent && boldAddedPos <= firstBoldPos;
        }
Exemplo n.º 36
0
        /// <summary>
        /// Checks for known typos on the page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>whether there are typos on the page</returns>
        public bool DetectTypo(string articleText, string articleTitle)
        {
            string originalArticleText = articleText;
            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
                return false;

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            // remove newlines, whitespace and hide tokens from bottom
            // to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            if (m.Success)
                articleText = articleText.Remove(m.Index);

            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);

                if (strSummary.Length > 0)
                    return true;
            }

            return false;
        }
Exemplo n.º 37
0
        /// <summary>
        /// Fix date and decade formatting errors.
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public string FixDates(string ArticleText)
        {
            HideText hidetext = new HideText();
            ArticleText = hidetext.HideMore(ArticleText);
            {
                ArticleText = FixDatesRaw(ArticleText);

                //Remove 2 or more <br />'s
                //This piece's existance here is counter-intuitive, but it requires HideMore()
                //and I don't want to call this slow function yet another time --MaxSem
                ArticleText = Regex.Replace(ArticleText.Trim(), @"(<br[\s/]*> *){2,}", "\r\n", RegexOptions.IgnoreCase);
            }
            ArticleText = hidetext.AddBackMore(ArticleText);
            return ArticleText;
        }
Exemplo n.º 38
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
            // don't tag redirects/outside article namespace/no tagging changes
            if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText))
                return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped)) / 2;

            // remove stub tags from long articles, don't move section stubs
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();
                tagsRemoved.Add("stub");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            // ignore commented out wikilinks, and any in {{Proposed deletion/dated}}
            int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, ""));

#if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
#endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                // also don't count "Proposed deletion..." cats
                List<Article> Cats = CategoryProv.MakeList(new[] { articleTitle });
                totalCategories = RegularCategories(Cats).Count;

                // cats may have been added to page by genfixes, F&R or user (when reparsing) so check cats on page if API says zero
                // so we correctly count for uncat tagging
                if(totalCategories == 0)
                    totalCategories = RegularCategories(articleText).Count;
            }

            // remove dead end when wikilinks on page, but not for en-wiki where dead end can mean "not enough" links
            if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText)
                && !Variables.LangCode.Equals("en"))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, "");
                else
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.DeadEnd.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else
                    {
                        tagsRemoved.Add("deadend");
                    }
                }
            }

            // discount persondata, comments, infoboxes and categories from wikify and stub evaluation
            string lengthtext = commentsCategoriesStripped;
            lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "");
            lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, "");
            lengthtext = Drugbox.Replace(lengthtext, "");

            int length = lengthtext.Length + 1;
            bool underlinked = (wikiLinkCount < 0.0025 * length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag. Exclude pages their title starts with "List of..."
                if (!ListOf.IsMatch(articleTitle))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة}}", 3);
                        tagsAdded.Add("بذرة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى}}", 3);
                        tagsAdded.Add("تقاوى");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{stub}}", 3);
                        tagsAdded.Add("stub");
                    }
                    commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
                }
            }

            // rename existing {{improve categories}} else add uncategorized tag
            if (totalCategories == 0 && ImproveCategories.IsMatch(articleText))
                articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized");

            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl")
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                    }
                }
                else
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                    }
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart();
                    	if (Variables.LangCode.Equals("ar"))
	                    	tagsRemoved.Add("غير مصنفة");
                    	else if (Variables.LangCode.Equals("arz"))
	                    	tagsRemoved.Add("مش متصنفه");
                    	else
	                    	tagsRemoved.Add("uncategorised");
                    		
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value;
                    if (!uncatname.Contains("stub"))
                    {
                    	if (Variables.LangCode.Equals("ar"))
                    	    articleText = Tools.RenameTemplate(articleText, uncatname, "بذرة غير مصنفة");
                    	else
                    	    articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub");
                    		
                    }
                }
            }

            if (wikiLinkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText))
            {
                // add dead-end tag
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                    tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
		                articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكي");
                    }
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                    tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
		                articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكى");
                    }
                }
                else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText)  && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b"))
                {
                    // Don't add excess newlines between new tags
                    articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText;
                    tagsAdded.Add("[[CAT:DE|deadend]]");
                    // if dead end then remove underlinked
                    if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1)
                    {
                        articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("underlinked");
                    }
                }
            }
            // add wikify tag, don't add underlinked/wikify if {{dead end}} already present
            // Dont' tag SIA pages, may create wikilinks from templates
            else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                     && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")
                     && !WikiRegexes.DeadEnd.IsMatch(articleText)
                     && !WikiRegexes.SIAs.IsMatch(articleText))
            {
                // Avoid excess newlines between templates
                string templateEnd = "}}\r\n" + (articleText.StartsWith(@"{{") ? "" : "\r\n");
                
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[وب:ويكي|ويكي]]");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[قالب:ويكى|ويكى]]");
                }
                else if (Variables.LangCode.Equals("sv"))
                {
                    articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[WP:WFY|wikify]]");
                }
                else
                {
                    articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[CAT:UL|underlinked]]");
                }
            }
            else if (wikiLinkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.Wikify.Replace(articleText, "");
                else
                    // remove wikify, except section templates or wikify tags with reason parameter specified
                    articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.Wikify.IsMatch(articleText))
                {
	                if (Variables.LangCode.Equals("ar"))
	                {
	                	tagsRemoved.Add("ويكي");
                	}     
	                else if (Variables.LangCode.Equals("arz"))
	                {
	                	tagsRemoved.Add("ويكى");
                	}
	                else
	                {
	                	tagsRemoved.Add("underlinked");
                	}
                }
            }

            // rename unreferenced --> refimprove if has existing refs, update date
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0)
            {
                articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"));

                // update tag in old-style multiple issues
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove");
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}");
                    if (!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0)
            {
                Parsers p = new Parsers();
                HideText ht = new HideText();

                articleText = ht.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = ht.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                // Don't sort interwikis, we can't specify the correct InterWikiSortOrder
                p.SortInterwikis = false;
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }