This class provides functions for 'hiding' certain syntax by replacing it with unique tokens and then adding it back after an operation was performed on text
 private string HideMore(string text, bool hideOnlyTargetOfWikilink)
 {
     Hider = new HideText();
     string s = Hider.HideMore(text, hideOnlyTargetOfWikilink);
     Assert.AreEqual(text, Hider.AddBackMore(s));
     return s;
 }
 private string Hide(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages)
 {
     Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages);
     string s = Hider.Hide(text);
     Assert.AreEqual(text, Hider.AddBack(s));
     return s;
 }
 private string HideMore(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages)
 {
     Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages);
     return Hider.HideMore(text);
 }
Example #4
0
        /// <summary>
        /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox
        /// </summary>
        private bool AddedBoldIsValid(string articleText, string escapedTitle)
        {
            HideText Hider2 = new HideText(true, true, true);
            Regex RegexBoldAdded = new Regex(@"^(.*?)'''(" + escapedTitle + @")", RegexOptions.Singleline | RegexOptions.IgnoreCase);

            int boldAddedPos = RegexBoldAdded.Match(articleText).Groups[2].Index;

            int firstBoldPos = RegexFirstBold.Match(articleText).Length;

            articleText = WikiRegexes.NestedTemplates.Replace(articleText, "");

            articleText = Hider2.HideMore(articleText);

            // was bold added in first 5% of article?
            bool inFirst5Percent = false;

            int articlelength = articleText.Length;

            if (articlelength > 5)
                inFirst5Percent = articleText.Trim().Substring(0, Math.Max(articlelength / 20, 5)).Contains("'''");

            articleText = Hider2.AddBackMore(articleText);
            // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article
            return inFirst5Percent && boldAddedPos <= firstBoldPos;
        }
Example #5
0
        public void HideImages()
        {
            AssertAllHidden(@"[[File:foo.jpg]]");
            AssertAllHidden(@"[[File:foo with space and 0004.jpg]]");
            AssertAllHidden(@"[[File:foo.jpeg]]");
            AssertAllHidden(@"[[File:foo.JPEG]]");
            AssertAllHidden(@"[[Image:foo with space and 0004.jpeg]]");
            AssertAllHidden(@"[[Image:foo.jpeg]]");
            AssertAllHidden(@"[[Image:foo with space and 0004.jpg]]");
            AssertAllHidden(@"[[File:foo.jpg|");
            AssertAllHidden(@"[[File:foo with space and 0004.jpg|");
            AssertAllHidden(@"[[File:foo.jpeg|");
            AssertAllHidden(@"[[Image:foo with space and 0004.jpeg|");
            AssertAllHidden(@"[[Image:foo.jpeg|");
            AssertAllHidden(@"[[Image:foo with SPACE() and 0004.jpg|");
            AssertAllHidden(@"[[File:foo.gif|");
            AssertAllHidden(@"[[Image:foo with space and 0004.gif|");
            AssertAllHidden(@"[[Image:foo.gif|");
            AssertAllHidden(@"[[Image:foo with SPACE() and 0004.gif|");
            AssertAllHidden(@"[[File:foo.png|");
            AssertAllHidden(@"[[Image:foo with space and 0004.png|");
            AssertAllHidden(@"[[Image:foo_here.png|");
            AssertAllHidden(@"[[Image:foo with SPACE() and 0004.png|");
            AssertAllHidden(@"[[Image:westminster.tube.station.jubilee.arp.jpg|");

            AssertAllHidden(@"<imagemap>
            File:Blogs001.jpeg|Description
            File:Blogs002.jpeg|Description
            </imagemap>");

            AssertBothHidden(@"[[File:foo.jpg]]");
            AssertBothHidden(@"[[Image:foo with space and 0004.png|");
            AssertBothHidden(@"[[Image:foo_here.png|");

            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target");
            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets");

            HideText h = new HideText(true, false, false);
            Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks");
        }
Example #6
0
 private void AssertBothHidden(string text, bool hideExternalLinks, bool leaveMetaHeadings, bool hideImages)
 {
     Hider = new HideText(hideExternalLinks, leaveMetaHeadings, hideImages);
     AssertAllHidden(text);
     AssertAllHiddenMore(text);
 }
 public void PerformUniversalGeneralFixes()
 {
     HideText H = new HideText();
     MockSkipOptions S = new MockSkipOptions();
     Article ar1 = new Article("Hello", " '''Hello''' world text");
     ar1.PerformUniversalGeneralFixes();
     ar1.PerformGeneralFixes(parser, H, S, false, false, false);
     Assert.AreEqual("'''Hello''' world text", ar1.ArticleText);
 }
Example #8
0
        // Covered by: LinkTests.FixDates()
        /// <summary>
        /// Fix date and decade formatting errors.
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public string FixDates(string ArticleText)
        {
            HideText hidetext = new HideText();
            ArticleText = hidetext.HideMore(ArticleText);
            {
                ArticleText = FixDatesRaw(ArticleText);

                //Remove 2 or more <br />'s
                //This piece's existance here is counter-intuitive, but it requires HideMore()
                //and I don't want to call this slow function yet another time --MaxSem
                ArticleText = SyntaxRemoveBr.Replace(ArticleText, "\r\n");
                ArticleText = SyntaxRemoveParagraphs.Replace(ArticleText, "\r\n\r\n");

            }
            ArticleText = hidetext.AddBackMore(ArticleText);
            return ArticleText;
        }
Example #9
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
            // don't tag redirects/outside article namespace/no tagging changes
            if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText))
                return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped))/2;

            // remove stub tags from long articles
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();
                tagsRemoved.Add("stub");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            // on en wiki, remove expand template when a stub template exists
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests/Archive_5#Remove_.7B.7Bexpand.7D.7D_when_a_stub_template_exists
            if (Variables.LangCode == "en" && WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                WikiRegexes.Expand.IsMatch(commentsCategoriesStripped))
            {
                articleText = WikiRegexes.Expand.Replace(articleText, "");
                tagsRemoved.Add("expand");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            
            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            int linkCount = Tools.LinkCount(commentsStripped);

            #if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
                #endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                List<Article> Cats = CategoryProv.MakeList(new[] {articleTitle});
                List<Article> CatsNotStubs = new List<Article>();

                foreach (Article a in Cats)
                {
                    if (!a.Name.EndsWith(" stubs") && !a.Name.EndsWith(":Stubs"))
                        CatsNotStubs.Add(a);
                }
                totalCategories = CatsNotStubs.Count;
            }

            if (linkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText))
            {
                articleText = WikiRegexes.DeadEnd.Replace(articleText, new MatchEvaluator(SectionTagME));
                
                if(!WikiRegexes.DeadEnd.IsMatch(articleText))
                    tagsRemoved.Add("deadend");
            }

            // discount persondata along with comments and categories from wikify and stub evaluation
            int length = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "").Length + 1;
            bool underlinked = (linkCount < 0.0025*length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag
                articleText += Tools.Newline("{{stub}}", 3);
                tagsAdded.Add("stub");
                commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            }

            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl"
                && !Tools.NestedTemplateRegex("cat improve").IsMatch(articleText)
                // category count is from API; don't add uncat tag if genfixes added person categories
                && !WikiRegexes.DeathsOrLivingCategory.IsMatch(articleText)
                && !WikiRegexes.BirthsCategory.IsMatch(articleText))
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter +
                        @"}}";
                    tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                }
                else
                {
                    // add uncategorized tag
                    articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                    tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "");
                    tagsRemoved.Add("uncategorised");
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value;
                    if (!uncatname.Contains("stub"))
                        articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub");
                }
            }

            if (linkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && Variables.LangCode != "sv"
                && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b"))
            {
                // add dead-end tag
                articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                tagsAdded.Add("[[:Category:Dead-end pages|deadend]]");
            }

            if (linkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify"))
            {
                // add wikify tag
                articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                tagsAdded.Add("[[WP:WFY|wikify]]");
            }
            else if (linkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                articleText = WikiRegexes.Wikify.Replace(articleText, new MatchEvaluator(SectionTagME));
                
                if(!WikiRegexes.Wikify.IsMatch(articleText))
                    tagsRemoved.Add("wikify");
            }

            // rename unreferenced --> refimprove if has existing refs
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && WikiRegexes.Refs.Matches(commentsCategoriesStripped).Count > 0)
            {
                articleText = Tools.RenameTemplate(articleText, "unreferenced", "refimprove", true);
                
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if(m.Success)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value,  "unreferenced", "refimprove");
                    if(!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0)
            {
                Parsers p = new Parsers();
                HideText ht = new HideText();

                articleText = ht.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = ht.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
        /// <summary>
        /// Extracts all of the interwiki and interwiki featured article links from the article text
        /// Ignores interwikis in comments/nowiki tags
        /// </summary>
        /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param>
        /// <returns>string of interwiki and interwiki featured article links</returns>
        public string Interwikis(ref string articleText)
        {
            string interWikiComment = "";
            if (InterLangRegex.IsMatch(articleText))
            {
                interWikiComment = InterLangRegex.Match(articleText).Value;
                articleText = articleText.Replace(interWikiComment, "");
            }
            
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment
            HideText hider = new HideText(false, true, false);

            articleText = hider.Hide(articleText);

            string interWikis = ListToString(RemoveLinkFGAs(ref articleText));
            
            if(interWikiComment.Length > 0)
                interWikis += interWikiComment + "\r\n";
            
            interWikis += ListToString(RemoveInterWikis(ref articleText));

            articleText = hider.AddBack(articleText);
            
            return interWikis;
        }
Example #11
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
            // don't tag redirects/outside article namespace/no tagging changes
            if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText))
                return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped)) / 2;

            // remove stub tags from long articles, don't move section stubs
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();
                tagsRemoved.Add("stub");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            // ignore commented out wikilinks, and any in {{Proposed deletion/dated}}
            int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, ""));

#if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
#endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                // also don't count "Proposed deletion..." cats
                List<Article> Cats = CategoryProv.MakeList(new[] { articleTitle });
                totalCategories = RegularCategories(Cats).Count;

                // cats may have been added to page by genfixes, F&R or user (when reparsing) so check cats on page if API says zero
                // so we correctly count for uncat tagging
                if(totalCategories == 0)
                    totalCategories = RegularCategories(articleText).Count;
            }

            // remove dead end when wikilinks on page, but not for en-wiki where dead end can mean "not enough" links
            if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText)
                && !Variables.LangCode.Equals("en"))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, "");
                else
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.DeadEnd.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else
                    {
                        tagsRemoved.Add("deadend");
                    }
                }
            }

            // discount persondata, comments, infoboxes and categories from wikify and stub evaluation
            string lengthtext = commentsCategoriesStripped;
            lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "");
            lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, "");
            lengthtext = Drugbox.Replace(lengthtext, "");

            int length = lengthtext.Length + 1;
            bool underlinked = (wikiLinkCount < 0.0025 * length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag. Exclude pages their title starts with "List of..."
                if (!ListOf.IsMatch(articleTitle))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة}}", 3);
                        tagsAdded.Add("بذرة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى}}", 3);
                        tagsAdded.Add("تقاوى");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{stub}}", 3);
                        tagsAdded.Add("stub");
                    }
                    commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
                }
            }

            // rename existing {{improve categories}} else add uncategorized tag
            if (totalCategories == 0 && ImproveCategories.IsMatch(articleText))
                articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized");

            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl")
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                    }
                }
                else
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                    }
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart();
                    	if (Variables.LangCode.Equals("ar"))
	                    	tagsRemoved.Add("غير مصنفة");
                    	else if (Variables.LangCode.Equals("arz"))
	                    	tagsRemoved.Add("مش متصنفه");
                    	else
	                    	tagsRemoved.Add("uncategorised");
                    		
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value;
                    if (!uncatname.Contains("stub"))
                    {
                    	if (Variables.LangCode.Equals("ar"))
                    	    articleText = Tools.RenameTemplate(articleText, uncatname, "بذرة غير مصنفة");
                    	else
                    	    articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub");
                    		
                    }
                }
            }

            if (wikiLinkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText))
            {
                // add dead-end tag
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                    tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
		                articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكي");
                    }
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                    tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
		                articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكى");
                    }
                }
                else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText)  && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b"))
                {
                    // Don't add excess newlines between new tags
                    articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText;
                    tagsAdded.Add("[[CAT:DE|deadend]]");
                    // if dead end then remove underlinked
                    if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1)
                    {
                        articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("underlinked");
                    }
                }
            }
            // add wikify tag, don't add underlinked/wikify if {{dead end}} already present
            // Dont' tag SIA pages, may create wikilinks from templates
            else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                     && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")
                     && !WikiRegexes.DeadEnd.IsMatch(articleText)
                     && !WikiRegexes.SIAs.IsMatch(articleText))
            {
                // Avoid excess newlines between templates
                string templateEnd = "}}\r\n" + (articleText.StartsWith(@"{{") ? "" : "\r\n");
                
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[وب:ويكي|ويكي]]");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[قالب:ويكى|ويكى]]");
                }
                else if (Variables.LangCode.Equals("sv"))
                {
                    articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[WP:WFY|wikify]]");
                }
                else
                {
                    articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[CAT:UL|underlinked]]");
                }
            }
            else if (wikiLinkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.Wikify.Replace(articleText, "");
                else
                    // remove wikify, except section templates or wikify tags with reason parameter specified
                    articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.Wikify.IsMatch(articleText))
                {
	                if (Variables.LangCode.Equals("ar"))
	                {
	                	tagsRemoved.Add("ويكي");
                	}     
	                else if (Variables.LangCode.Equals("arz"))
	                {
	                	tagsRemoved.Add("ويكى");
                	}
	                else
	                {
	                	tagsRemoved.Add("underlinked");
                	}
                }
            }

            // rename unreferenced --> refimprove if has existing refs, update date
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0)
            {
                articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"));

                // update tag in old-style multiple issues
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove");
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}");
                    if (!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0)
            {
                Parsers p = new Parsers();
                HideText ht = new HideText();

                articleText = ht.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = ht.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                // Don't sort interwikis, we can't specify the correct InterWikiSortOrder
                p.SortInterwikis = false;
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
Example #12
0
        /// <summary>
        /// Performs typo fixes against the article text in multi-threaded mode
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange">True if no typos fixed</param>
        /// <param name="summary">Edit summary</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>Updated article text</returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;
            summary = "";
            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return articleText;
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            // remove newlines, whitespace and hide tokens from bottom
            // to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail))
                articleText = articleText.Remove(m.Index);

            string originalText = articleText;
            string strSummary = "";
            /* Run typos threaded, one thread per group for better performance
             * http://stackoverflow.com/questions/13776846/pass-paramters-through-parameterizedthreadstart
             * http://www.dotnetperls.com/parameterizedthreadstart
             * http://stackoverflow.com/questions/831009/thread-with-multiple-parameters */
            resultSummary.Clear();
            resultArticleText.Clear();

            Thread[] array = new Thread[Groups.Count];
            int i = 0;
            foreach (TypoGroup tg in Groups)
            {
                array[i] =
                    new Thread(
                        delegate()
                        {
                            tg.FixTypos2(articleText, strSummary, articleTitle, originalArticleText);
                        });
                array[i].Start();
                i++;
            }

            // Join all the threads: wait for all to complete
            foreach (Thread t in array)
            {
                t.Join();
            }

            foreach (TypoGroup tg in Groups)
            {
                string groupSummary;
                resultSummary.TryGetValue(tg.GroupSize, out groupSummary);
                string groupArticleText;
                resultArticleText.TryGetValue(tg.GroupSize, out groupArticleText);

                if (groupSummary.Length > 0)
                {
                    if (strSummary.Length > 0)
                    {
                        // earlier thread had changes, so need to re-run this one
                        tg.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
                    }
                    else
                    {
                        strSummary = groupSummary;
                        articleText = groupArticleText;
                    }
                }
            }

            noChange = originalText.Equals(articleText);

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return removeText.AddBackMore(articleText + tail);
        }
Example #13
0
        /// <summary>
        /// Checks for known typos on the page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>whether there are typos on the page</returns>
        public bool DetectTypo(string articleText, string articleTitle)
        {
            string originalArticleText = articleText;
            if (TypoCount == 0 || IgnoreRegex.IsMatch(articleText))
                return false;

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            // remove newlines, whitespace and hide tokens from bottom
            // to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            if (m.Success)
                articleText = articleText.Remove(m.Index);

            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);

                if (strSummary.Length > 0)
                    return true;
            }

            return false;
        }
 private string Hide(string text)
 {
     hider = new HideText();
     return hider.HideMore(text);
 }
Example #15
0
 /// <summary>
 /// Sets the date (month & year) for undated cleanup tags that take a date
 /// Avoids changing tags in unformatted text areas (wiki comments etc.)
 /// Note: bugzilla 2700 means {{ssubst}} within ref tags doesn't work, AWB doesn't do anything about it
 /// </summary>
 /// <param name="articleText">The wiki text of the article.</param>
 /// <returns>The updated article text</returns>
 public static string TagUpdater(string articleText)
 {
     HideText ht = new HideText();
     articleText = ht.HideUnformatted(articleText);
     
     foreach (KeyValuePair<Regex, string> k in RegexTagger)
     {
         articleText = k.Key.Replace(articleText, 
                                     m => (Tools.GetTemplateParameterValue(m.Value, "Date").Length > 0 ? 
                                           Tools.RenameTemplateParameter(m.Value, "Date", "date") : k.Value.Replace("$1", m.Groups[1].Value)));
     }
     return ht.AddBackUnformatted(articleText);
 }
 private string Hide(string text, bool HideExternalLinks, bool LeaveMetaHeadings, bool HideImages)
 {
     Hider = new HideText(HideExternalLinks, LeaveMetaHeadings, HideImages);
     return Hider.Hide(text);
 }
        public string PerformTypoFixes(string ArticleText, out bool NoChange, out string Summary)
        {
            Summary = "";
            if (TyposCount == 0)
            {
                NoChange = true;
                return ArticleText;
            }

            if (IgnoreRegex.IsMatch(ArticleText))
            {
                NoChange = true;
                return ArticleText;
            }

            HideText RemoveText = new HideText(true, false, true);

            ArticleText = RemoveText.HideMore(ArticleText);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match m = RemoveTail.Match(ArticleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail)) ArticleText = ArticleText.Remove(m.Index);

            string originalText = ArticleText;
            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref ArticleText, ref strSummary);
            }

            NoChange = (originalText == ArticleText);

            ArticleText = RemoveText.AddBackMore(ArticleText + tail);

            if (!string.IsNullOrEmpty(strSummary))
            {
                strSummary = Variables.TypoSummaryTag + strSummary.Trim();
                Summary = strSummary;
            }

            return ArticleText;
        }
        public void HideImages()
        {
            Assert.IsFalse(Hide(@"[[File:foo.jpg]]").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg]]").Contains("foo"), "with space");
            Assert.IsFalse(Hide(@"[[File:foo.jpeg]]").Contains("foo"), "jpeg");
            Assert.IsFalse(Hide(@"[[File:foo.JPEG]]").Contains("foo"), "JPEG");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg]]").Contains("foo"), "space and jpeg");
            Assert.IsFalse(Hide(@"[[Image:foo.jpeg]]").Contains("foo"), "Image jpeg");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpg]]").Contains("foo"), "image jpeg space");
            Assert.IsFalse(Hide(@"[[File:foo.jpg|").Contains("foo"), "To pipe");
            Assert.IsFalse(Hide(@"[[File:foo with space and 0004.jpg|").Contains("foo"), "Space to pipe");
            Assert.IsFalse(Hide(@"[[File:foo.jpeg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.jpeg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo.jpeg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.jpg|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[File:foo.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.gif|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[File:foo.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with space and 0004.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo_here.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:foo with SPACE() and 0004.png|").Contains("foo"), "Standard case");
            Assert.IsFalse(Hide(@"[[Image:westminster.tube.station.jubilee.arp.jpg|").Contains("westminster.tube.station.jubilee.arp"), "Dot name");

            Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").StartsWith("[["), "Retain starting brackets");
            Assert.IsTrue(Hide(@"[[File:foo.jpg|thumb|140px|[[Jo]] Assistant [[Ge]]]]").Contains(@"thumb|140px|[[Jo]] Assistant [[Ge]]]]"), "Retain ending brackets");

            AssertAllHidden(@"<imagemap>
            File:Blogs001.jpeg|Description
            File:Blogs002.jpeg|Description
            </imagemap>");

            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", false).Contains("abc"), "Category sort key always hidden if hiding wikilinks and not leaving target");
            Assert.IsFalse(HideMore(@"[[Category:Foo|abc]]", true).Contains("abc"), "Category sort key hidden even if keeping targets");

            HideText h = new HideText(true, false, false);
            Assert.IsTrue(h.HideMore(@"[[Category:Foo|abc]]", false, false).Contains("abc"), "Category sort key kept if keeping wikilinks");
        }
Example #19
0
        // Covered by: LinkTests.TestBulletExternalLinks()
        /// <summary>
        /// Adds bullet points to external links after "external links" header
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public static string BulletExternalLinks(string ArticleText)
        {
            int intStart = 0;
            string articleTextSubstring = "";

            Match m = Regex.Match(ArticleText, @"=\s*(?:external)?\s*links\s*=", RegexOptions.IgnoreCase | RegexOptions.RightToLeft);

            if (!m.Success)
                return ArticleText;

            intStart = m.Index;

            articleTextSubstring = ArticleText.Substring(intStart);
            ArticleText = ArticleText.Substring(0, intStart);
            HideText ht = new HideText(false, true, false);
            articleTextSubstring = ht.HideMore(articleTextSubstring);
            articleTextSubstring = Regex.Replace(articleTextSubstring, "(\r\n|\n)?(\r\n|\n)(\\[?http)", "$2* $3");
            articleTextSubstring = ht.AddBackMore(articleTextSubstring);
            ArticleText += articleTextSubstring;

            return ArticleText;
        }
Example #20
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            HideText Hider2 = new HideText();
            HideText Hider3 = new HideText(true, true, true);
            // clean up bolded self links first
            articleText = BoldedSelfLinks(articleTitle, articleText);

            noChange = true;
            string escTitle = Regex.Escape(articleTitle);
            string escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));

            string articleTextAtStart = articleText;

            string zerothSection = WikiRegexes.ZerothSection.Match(articleText).Value;
            string restOfArticle = articleText.Remove(0, zerothSection.Length);

            // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
            string zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false);
            string zerothSectionHiddenOriginal = zerothSectionHidden;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            Regex r1 = new Regex(@"\[\[\s*" + escTitle + @"\s*\]\]");
            Regex r2 = new Regex(@"\[\[\s*" + Tools.TurnFirstToLower(escTitle) + @"\s*\]\]");

            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if (!Regex.IsMatch(zerothSection, "'''" + escTitle + "'''") && !WikiRegexes.Noinclude.IsMatch(articleText) && !WikiRegexes.Includeonly.IsMatch(articleText))
                zerothSectionHidden = r1.Replace(zerothSectionHidden, "'''" + articleTitle + @"'''");
            if (zerothSectionHiddenOriginal == zerothSectionHidden && !Regex.IsMatch(zerothSection, @"'''" + Tools.TurnFirstToLower(escTitle) + @"'''"))
                zerothSectionHidden = r2.Replace(zerothSectionHidden, "'''" + Tools.TurnFirstToLower(articleTitle) + @"'''");

            zerothSection = Hider2.AddBackMore(zerothSectionHidden);

            if (zerothSectionHiddenOriginal != zerothSectionHidden)
            {
                noChange = false;
                return (zerothSection + restOfArticle);
            }

            // ignore date articles (date in American or international format)
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle))
                return articleTextAtStart;

            Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            //if title in bold already exists in article, or page starts with something in bold, don't change anything
            if (boldTitleAlready1.IsMatch(articleText) || boldTitleAlready2.IsMatch(articleText)
                || BoldTitleAlready3.IsMatch(articleText))
                return articleTextAtStart;

            // so no self links to remove, check for the need to add bold
            string articleTextHidden = Hider3.HideMore(articleText);

            // first quick check: ignore articles with some bold in first 5% of hidemore article
            int fivepc = articleTextHidden.Length / 20;

            if (articleTextHidden.Substring(0, fivepc).Contains("'''"))
            {
                //articleText = Hider3.AddBackMore(articleTextHidden);
                return articleTextAtStart;
            }

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            // first try title with brackets removed
            if (regexBoldNoBrackets.IsMatch(articleTextHidden))
                articleTextHidden = regexBoldNoBrackets.Replace(articleTextHidden, "$1'''$2'''$3", 1);

            articleText = Hider3.AddBackMore(articleTextHidden);

            // check that the bold added is the first bit in bold in the main body of the article
            if (AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return articleText;
            }

            return articleTextAtStart;
        }
        /// <summary>
        /// Performs typo fixes against the article text.
        /// Typo fixes not performed if no typos loaded or any sic tags on page
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="noChange"></param>
        /// <param name="summary"></param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns></returns>
        public string PerformTypoFixes(string articleText, out bool noChange, out string summary, string articleTitle)
        {
            string originalArticleText = articleText;
            summary = "";
            if ((TypoCount == 0) || IgnoreRegex.IsMatch(articleText))
            {
                noChange = true;
                return articleText;
            }

            HideText removeText = new HideText(true, false, true);

            articleText = removeText.HideMore(articleText, true);

            //remove newlines, whitespace and hide tokens from bottom
            //to avoid running 2K regexps on them
            Match m = RemoveTail.Match(articleText);
            string tail = m.Value;
            if (!string.IsNullOrEmpty(tail)) articleText = articleText.Remove(m.Index);

            string originalText = articleText;
            string strSummary = "";

            foreach (TypoGroup grp in Groups)
            {
                grp.FixTypos(ref articleText, ref strSummary, articleTitle, originalArticleText);
            }

            noChange = (originalText.Equals(articleText));

            summary = Variables.TypoSummaryTag + strSummary.Trim();

            return removeText.AddBackMore(articleText + tail);
        }
Example #22
0
        /// <summary>
        /// Checks that the bold just added to the article is the first bold in the article, and that it's within the first 5% of the HideMore article OR immediately after the infobox
        /// </summary>
        private bool AddedBoldIsValid(string articleText, string escapedTitle)
        {
            HideText Hider2 = new HideText(true, true, true);
            string articletextoriginal = articleText;
            Regex regexBoldAdded = new Regex(@"^(.*?)'''" + escapedTitle, RegexOptions.Singleline);

            int boldAddedPos = regexBoldAdded.Match(articleText).Length - Regex.Unescape(escapedTitle).Length;

            int firstBoldPos = RegexFirstBold.Match(articleText).Length;

            articleText = Hider2.HideMore(articleText);

            // was bold added in first 5% of article?
            bool inFirst5Percent = articleText.Substring(0, articleText.Length / 20).Contains("'''");

            //articleText = Hider2.AddBackMore(articleText);

            // check that the bold added is the first bit in bold in the main body of the article, and in first 5% of HideMore article
            if (inFirst5Percent && boldAddedPos <= firstBoldPos)
                return true;

            // second check: bold just after infobox
            Regex boldAfterInfobox = new Regex(WikiRegexes.InfoBox + @"\s*'''" + escapedTitle);

            return boldAfterInfobox.IsMatch(articletextoriginal);
        }
Example #23
0
 /// <summary>
 /// Sets the date (month & year) for undated cleanup tags that take a date
 /// Avoids changing tags in unformatted text areas (wiki comments etc.)
 /// </summary>
 /// <param name="articleText">The wiki text of the article.</param>
 /// <returns>The updated article text</returns>
 public static string TagUpdater(string articleText)
 {
     HideText ht = new HideText();
     articleText = ht.HideUnformatted(articleText);
     
     foreach (KeyValuePair<Regex, string> k in RegexTagger)
     {
         articleText = k.Key.Replace(articleText, k.Value);
     }
     return ht.AddBackUnformatted(articleText);
 }
        /// <summary>
        /// Extracts all of the interwiki and interwiki featured article links from the article text
        /// Ignores interwikis in comments/nowiki tags
        /// </summary>
        /// <param name="articleText">Article text with interwiki and interwiki featured article links removed</param>
        /// <returns>string of interwiki and interwiki featured article links</returns>
        public string Interwikis(ref string articleText)
        {
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_12#Interwiki_links_moved_out_of_comment
            HideText hider = new HideText(false, true, false);

            articleText = hider.Hide(articleText);

            string interWikis = ListToString(RemoveLinkFGAs(ref articleText)) + ListToString(RemoveInterWikis(ref articleText));

            articleText = hider.AddBack(articleText);

            return interWikis;
        }
Example #25
0
        public void Unicodify()
        {
            Parsers Parser = new Parsers();
            HideText RemoveText = new HideText(false, true, false);
            Article a = new Article("a", @"'''test'''. z &amp; a&Dagger; &dagger;.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ");

            a.Unicodify(true, Parser, RemoveText);

            Assert.AreEqual(@"'''test'''. z & a‡ †.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ", a.ArticleText, "Text unicodified");

            a = new Article("a", @"'''test'''. z &amp; {{t|a&Dagger; &dagger;}}.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ");

            a.Unicodify(true, Parser, RemoveText);

            Assert.AreEqual(@"'''test'''. z & {{t|a&Dagger; &dagger;}}.

            {{DEFAULTSORT:Hello test}}
            [[Category:Test pages]]
            ", a.ArticleText, "Text unicodified, hidemore used");

            a = new Article("a", @"ABC");
            a.Unicodify(true, Parser, RemoveText);
            Assert.AreEqual(@"ABC", a.ArticleText, "No change");
        }
Example #26
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
 			if(!TaggerPermitted(articleText, articleTitle))
 				return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();
            int tagsrenamed = 0;

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped, 999) + Tools.WordCount(crapStripped, 999)) / 2;

            // remove stub tags from long articles, don't move section stubs
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();

                if (Variables.LangCode.Equals("ar"))
                {
                    tagsRemoved.Add("بذرة");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    tagsRemoved.Add("تقاوى");
                }
                else if (Variables.LangCode.Equals("hy"))
                {
                    tagsRemoved.Add("Անավարտ");
                }
                else
                {
                    tagsRemoved.Add("stub");
                }
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            //remove disambiguation if disambiguation cleanup exists (en-wiki only)
            if (Variables.LangCode.Equals("en") && WikiRegexes.DisambigsCleanup.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.DisambigsGeneral.Replace(articleText, "").Trim();
            }

            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            // ignore commented out wikilinks, and any in {{Proposed deletion/dated}}
            int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, ""));

            #if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
                #endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                // also don't count "Proposed deletion..." cats
                // limitation: in the unlikely event that the article has only redlinked cats then it is {{uncat}} but we won't tag it as such
                totalCategories = RegularCategories(articleText).Count;

                // templates may add categories to page that are not [[Category...]] links, so use API call for accurate Category count
                if(totalCategories == 0)
                    totalCategories = RegularCategories(CategoryProv.MakeList(new[] { articleTitle })).Count;
            }

            // remove dead end if > 0 wikilinks on page
            if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, "");
                else
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.DeadEnd.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else
                    {
                        tagsRemoved.Add("deadend");
                    }
                }
            }

            // discount persondata, comments, infoboxes and categories from wikify/underlinked and stub evaluation
            string lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "");
            lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, "");
            lengthtext = Drugbox.Replace(lengthtext, "");

            int length = lengthtext.Length + 1;
            bool underlinked = (wikiLinkCount < 0.0025 * length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag. Exclude pages their title starts with "List of..."
                if (!ListOf.IsMatch(articleTitle) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة}}", 3);
                        tagsAdded.Add("بذرة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى}}", 3);
                        tagsAdded.Add("تقاوى");
                    }
                    else if (Variables.LangCode.Equals("hy"))
                    {
                        articleText += Tools.Newline("{{Անավարտ}}", 3);
                        tagsAdded.Add("Անավարտ");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{stub}}", 3);
                        tagsAdded.Add("stub");
                    }
                    commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
                }
            }

            // rename existing {{improve categories}} else add uncategorized tag
            if (totalCategories == 0 && ImproveCategories.IsMatch(articleText))
                articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized");

            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl")
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]");
                    }
                    else if(Variables.LangCode.Equals("hy")) // same template for uncat and uncat stub
                    {
                        articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("Կատեգորիա չկա");
                    }
                    else if(Variables.LangCode.Equals("sv")) // same template for uncat and uncat stub
                    {
                        articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                    }
                }
                else
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]");
                    }
                    else if(Variables.LangCode.Equals("el"))
                    {
                        articleText += Tools.Newline("{{Ακατηγοριοποίητο|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Πρότυπο:Ακατηγοριοποίητο|ακατηγοριοποίητο]]");
                    }
                    else if(Variables.LangCode.Equals("hy"))
                    {
                        articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("Կատեգորիա չկա");
                    }
                    else if(Variables.LangCode.Equals("sv"))
                    {
                        articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                    }
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart();
                    if (Variables.LangCode.Equals("ar"))
                        tagsRemoved.Add("غير مصنفة");
                    else if (Variables.LangCode.Equals("arz"))
                        tagsRemoved.Add("مش متصنفه");
                    else
                        tagsRemoved.Add("uncategorised");
                    
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                   // rename uncat to uncat stub if no uncat stub. If uncat and uncat stub, remove uncat.
                    bool uncatstub = false;
                    foreach(Match u in WikiRegexes.Uncat.Matches(articleText))
                    {
                        if(WikiRegexes.Stub.IsMatch(u.Value))
                        {
                            uncatstub = true;
                            break;
                        }
                    }

                    articleText = WikiRegexes.Uncat.Replace(articleText, u2 => {
                                                                if (!uncatstub) // rename
                                                                {
                                                                    tagsrenamed++;
                                                                    if (Variables.LangCode.Equals("ar"))
                                                                        return Tools.RenameTemplate(u2.Value, "بذرة غير مصنفة");
                                                                    else if (Variables.LangCode.Equals("arz"))
                                                                        return Tools.RenameTemplate(u2.Value, "تقاوى مش متصنفه");
                                                                    else if (Variables.LangCode.Equals("en") || Variables.LangCode.Equals("simple"))
                                                                        return Tools.RenameTemplate(u2.Value, "Uncategorized stub");
                                                                }
                                                                else // already uncat stub so remove plain uncat
                                                                {
                                                                    if(!WikiRegexes.Stub.IsMatch(u2.Value))
                                                                    {
                                                                        if (Variables.LangCode.Equals("ar"))
                                                                            tagsRemoved.Add("غير مصنفة");
                                                                        else if (Variables.LangCode.Equals("arz"))
                                                                            tagsRemoved.Add("مش متصنفه");
                                                                        else
                                                                            tagsRemoved.Add("uncategorised");
                                                                        return "";
                                                                    }
                                                                }
                                                                return u2.Value;
                                                            });
                }
            }

            if (wikiLinkCount == 0 &&
                !WikiRegexes.DeadEnd.IsMatch(articleText) &&
                !WikiRegexes.SIAs.IsMatch(articleText) &&
                !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText) &&
                !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle)
               )
            {
                // add dead-end tag
                // no blank line between dead end and orphan tags for ar/arz
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + (WikiRegexes.Orphan.IsMatch(articleText) ? "" : "\r\n") + articleText;
                    tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]");
                    // if dead end then remove underlinked/wikify
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
                        articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكي");
                    }
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + articleText;
                    tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
                        articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكى");
                    }
                }
                else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText)
                         && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b")
                         && !MinorPlanetListFooter.IsMatch(articleText))
                {
                    // Don't add excess newlines between new tags
                    articleText = "{{Dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText;
                    tagsAdded.Add("[[CAT:DE|deadend]]");
                    // if dead end then remove underlinked
                    if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1)
                    {
                        articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("underlinked");
                    }
                }
            }
            // add underlinked/wikify tag, don't add underlinked/wikify if {{dead end}} already present
            // Dont' tag SIA pages, may create wikilinks from templates
            else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                     && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")
                     && !WikiRegexes.DeadEnd.IsMatch(articleText)
                     && !WikiRegexes.SIAs.IsMatch(articleText)
                     && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText)
                     && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle))
            {
                // Avoid excess newlines between templates
                string templateEnd = "}}\r\n" + (articleText.TrimStart().StartsWith(@"{{") ? "" : "\r\n");
                
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText.TrimStart();
                    tagsAdded.Add("[[وب:ويكي|ويكي]]");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[قالب:ويكى|ويكى]]");
                }
                else if (Variables.LangCode.Equals("sv"))
                {
                    articleText = "{{Ickewiki|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[WP:PW|ickewiki]]");
                }
                else
                {
                    articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[CAT:UL|underlinked]]");
                }
            }
            else if (wikiLinkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.Wikify.Replace(articleText, "");
                else
                    // remove wikify, except section templates or wikify tags with reason parameter specified
                    articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.Wikify.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("ويكي");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("ويكى");
                    }
                    else
                    {
                        tagsRemoved.Add("underlinked");
                    }
                }
            }

            // rename unreferenced --> refimprove if has existing refs, update date
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0)
            {
                articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"));

                // update tag in old-style multiple issues
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove");
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}");
                    if (!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0 || tagsrenamed > 0)
            {
                Parsers p = new Parsers();
                HideText hider = new HideText();

                articleText = hider.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = hider.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                // Don't sort interwikis, we can't specify the correct InterWikiSortOrder
                p.SortInterwikis = false;
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
Example #27
0
        public void HideMore()
        {
            Hider = new HideText(true, false, true);

            string text = Hider.HideMore("[[foo]]", false, true);
            RegexAssert.IsMatch(AllHidden, text);
            text = Hider.AddBackMore(text);
            Assert.AreEqual("[[foo]]", text);
        }
 private string HideMore(string text, bool HideOnlyTargetOfWikilink)
 {
     Hider = new HideText();
     return Hider.HideMore(text, HideOnlyTargetOfWikilink);
 }
Example #29
0
        // Covered by: BoldTitleTests
        /// <summary>
        /// '''Emboldens''' the first occurrence of the article title, if not already bold
        /// 1) Cleans up bolded self wikilinks
        /// 2) Cleans up self wikilinks
        /// 3) '''Emboldens''' the first occurrence of the article title
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The title of the article.</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public string BoldTitle(string articleText, string articleTitle, out bool noChange)
        {
            noChange = true;
            List<string> alltemplates = GetAllTemplates(articleText);

            if(TemplateExists(alltemplates, NoBoldTitle))
                return articleText;

            HideText Hider2 = new HideText(), Hider3 = new HideText(true, true, true);

            // 1) clean up bolded self links first, provided no noinclude use in article
            string afterSelfLinks = BoldedSelfLinks(articleTitle, articleText);

            if(!afterSelfLinks.Equals(articleText) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
                articleText = afterSelfLinks;

            // 2) Clean up self wikilinks
            string articleTextAtStart = articleText, zerothSection = Tools.GetZerothSection(articleText);
            string restOfArticle = articleText.Substring(zerothSection.Length);
            string zerothSectionHidden, zerothSectionHiddenOriginal;

            // first check for any self links and no bold title, if found just convert first link to bold and return
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // don't apply if bold in lead section already or some noinclude transclusion business
            if(!SelfLinks(zerothSection, articleTitle).Equals(zerothSection) && !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText))
            {
                // There's a limitation here in that we can't hide image descriptions that may be above lead sentence without hiding the self links we are looking to correct
                zerothSectionHidden = Hider2.HideMore(zerothSection, false, false, false);
                zerothSectionHiddenOriginal = zerothSectionHidden;
                zerothSectionHidden = SelfLinks(zerothSectionHidden, articleTitle);
                zerothSection = Hider2.AddBackMore(zerothSectionHidden);

                if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden))
                {
                    noChange = false;
                    return (zerothSection + restOfArticle);
                }
            }

            // Performance check: if article title not in zeroth section have nothing further to do
            if(zerothSection.IndexOf(BracketedAtEndOfLine.Replace(articleTitle, ""), StringComparison.OrdinalIgnoreCase) < 0)
                return articleTextAtStart;

            // 3) '''Emboldens''' the first occurrence of the article title

            // ignore date articles (date in American or international format), nihongo title
            if (WikiRegexes.Dates2.IsMatch(articleTitle) || WikiRegexes.Dates.IsMatch(articleTitle)
                || TemplateExists(alltemplates, NihongoTitle))
                return articleTextAtStart;

            string escTitle = Regex.Escape(articleTitle), escTitleNoBrackets = Regex.Escape(BracketedAtEndOfLine.Replace(articleTitle, ""));
            Regex boldTitleAlready1 = new Regex(@"'''\s*(" + escTitle + "|" + Tools.TurnFirstToLower(escTitle) + @")\s*'''");
            Regex boldTitleAlready2 = new Regex(@"'''\s*(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + @")\s*'''");

            // if title in bold already exists in article, or paragraph starts with something in bold, don't change anything
            // ignore any bold in infoboxes
            if(BoldTitleAlready4.IsMatch(Tools.ReplaceWithSpaces(zerothSection, WikiRegexes.InfoBox.Matches(zerothSection))) || DfnTag.IsMatch(zerothSection))
                return articleTextAtStart;

            string articleTextNoInfobox = Tools.ReplaceWithSpaces(articleText, WikiRegexes.InfoBox.Matches(articleText));
            if (boldTitleAlready1.IsMatch(articleTextNoInfobox) || boldTitleAlready2.IsMatch(articleTextNoInfobox)
                || BoldTitleAlready3.IsMatch(articleTextNoInfobox))
                return articleTextAtStart;

            // so no self links to remove, check for the need to add bold
            string articleTextNoTemplates = WikiRegexes.NestedTemplates.Replace(articleText, "");

            // first quick check: ignore articles with some bold in first 5% of article, ignoring infoboxes, dablinks etc.
            int fivepc = articleTextNoTemplates.Length / 20;

            if (articleTextNoTemplates.Substring(0, fivepc).Contains("'''"))
                return articleTextAtStart;

            Regex regexBoldNoBrackets = new Regex(@"([^\[]|^)(" + escTitleNoBrackets + "|" + Tools.TurnFirstToLower(escTitleNoBrackets) + ")([ ,.:;])");

            zerothSectionHidden = Hider3.HideMore(zerothSection);
            zerothSectionHiddenOriginal = zerothSectionHidden;

            // first try title with brackets removed
            zerothSectionHidden = regexBoldNoBrackets.Replace(zerothSectionHidden, "$1'''$2'''$3", 1);

            zerothSection = Hider3.AddBackMore(zerothSectionHidden);

            articleText = zerothSection + restOfArticle;

            // check that the bold added is the first bit in bold in the main body of the article
            if (!zerothSectionHiddenOriginal.Equals(zerothSectionHidden) && AddedBoldIsValid(articleText, escTitleNoBrackets))
            {
                noChange = false;
                return articleText;
            }

            return articleTextAtStart;
        }
Example #30
0
        /// <summary>
        /// Fix date and decade formatting errors.
        /// </summary>
        /// <param name="ArticleText">The wiki text of the article.</param>
        /// <returns>The modified article text.</returns>
        public string FixDates(string ArticleText)
        {
            HideText hidetext = new HideText();
            ArticleText = hidetext.HideMore(ArticleText);
            {
                ArticleText = FixDatesRaw(ArticleText);

                //Remove 2 or more <br />'s
                //This piece's existance here is counter-intuitive, but it requires HideMore()
                //and I don't want to call this slow function yet another time --MaxSem
                ArticleText = Regex.Replace(ArticleText.Trim(), @"(<br[\s/]*> *){2,}", "\r\n", RegexOptions.IgnoreCase);
            }
            ArticleText = hidetext.AddBackMore(ArticleText);
            return ArticleText;
        }