AddBackUnformatted() public method

Adds things removed by HideUnformatted back
public AddBackUnformatted ( string articleText ) : string
articleText string The wiki text of the article.
return string
Example #1
0
        public string SubstituteTemplates(string ArticleText, string ArticleTitle)
        {
            if (Regexes.Count == 0)
            {
                return(ArticleText);                    // nothing to substitute
            }
            if (chkIgnoreUnformatted.Checked)
            {
                ArticleText = RemoveUnformatted.HideUnformatted(ArticleText);
            }
            if (!chkUseExpandTemplates.Checked)
            {
                foreach (KeyValuePair <Regex, string> p in Regexes)
                {
                    ArticleText = p.Key.Replace(ArticleText, p.Value);
                }
            }
            else
            {
                ArticleText = Tools.ExpandTemplate(ArticleText, ArticleTitle, Regexes, chkIncludeComment.Checked);
            }

            if (chkIgnoreUnformatted.Checked)
            {
                ArticleText = RemoveUnformatted.AddBackUnformatted(ArticleText);
            }

            return(ArticleText);
        }
Example #2
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
 			if(!TaggerPermitted(articleText, articleTitle))
 				return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();
            int tagsrenamed = 0;

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped, 999) + Tools.WordCount(crapStripped, 999)) / 2;

            // remove stub tags from long articles, don't move section stubs
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();

                if (Variables.LangCode.Equals("ar"))
                {
                    tagsRemoved.Add("بذرة");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    tagsRemoved.Add("تقاوى");
                }
                else if (Variables.LangCode.Equals("hy"))
                {
                    tagsRemoved.Add("Անավարտ");
                }
                else
                {
                    tagsRemoved.Add("stub");
                }
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            //remove disambiguation if disambiguation cleanup exists (en-wiki only)
            if (Variables.LangCode.Equals("en") && WikiRegexes.DisambigsCleanup.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.DisambigsGeneral.Replace(articleText, "").Trim();
            }

            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            // ignore commented out wikilinks, and any in {{Proposed deletion/dated}}
            int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, ""));

            #if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
                #endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                // also don't count "Proposed deletion..." cats
                // limitation: in the unlikely event that the article has only redlinked cats then it is {{uncat}} but we won't tag it as such
                totalCategories = RegularCategories(articleText).Count;

                // templates may add categories to page that are not [[Category...]] links, so use API call for accurate Category count
                if(totalCategories == 0)
                    totalCategories = RegularCategories(CategoryProv.MakeList(new[] { articleTitle })).Count;
            }

            // remove dead end if > 0 wikilinks on page
            if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, "");
                else
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.DeadEnd.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else
                    {
                        tagsRemoved.Add("deadend");
                    }
                }
            }

            // discount persondata, comments, infoboxes and categories from wikify/underlinked and stub evaluation
            string lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "");
            lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, "");
            lengthtext = Drugbox.Replace(lengthtext, "");

            int length = lengthtext.Length + 1;
            bool underlinked = (wikiLinkCount < 0.0025 * length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag. Exclude pages their title starts with "List of..."
                if (!ListOf.IsMatch(articleTitle) && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة}}", 3);
                        tagsAdded.Add("بذرة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى}}", 3);
                        tagsAdded.Add("تقاوى");
                    }
                    else if (Variables.LangCode.Equals("hy"))
                    {
                        articleText += Tools.Newline("{{Անավարտ}}", 3);
                        tagsAdded.Add("Անավարտ");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{stub}}", 3);
                        tagsAdded.Add("stub");
                    }
                    commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
                }
            }

            // rename existing {{improve categories}} else add uncategorized tag
            if (totalCategories == 0 && ImproveCategories.IsMatch(articleText))
                articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized");

            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl")
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]");
                    }
                    else if(Variables.LangCode.Equals("hy")) // same template for uncat and uncat stub
                    {
                        articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("Կատեգորիա չկա");
                    }
                    else if(Variables.LangCode.Equals("sv")) // same template for uncat and uncat stub
                    {
                        articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                    }
                }
                else
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]");
                    }
                    else if(Variables.LangCode.Equals("el"))
                    {
                        articleText += Tools.Newline("{{Ακατηγοριοποίητο|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Πρότυπο:Ακατηγοριοποίητο|ακατηγοριοποίητο]]");
                    }
                    else if(Variables.LangCode.Equals("hy"))
                    {
                        articleText += Tools.Newline("{{Կատեգորիա չկա|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("Կատեգորիա չկա");
                    }
                    else if(Variables.LangCode.Equals("sv"))
                    {
                        articleText += Tools.Newline("{{Okategoriserad|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[Mall:Okategoriserad|okategoriserad]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                    }
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart();
                    if (Variables.LangCode.Equals("ar"))
                        tagsRemoved.Add("غير مصنفة");
                    else if (Variables.LangCode.Equals("arz"))
                        tagsRemoved.Add("مش متصنفه");
                    else
                        tagsRemoved.Add("uncategorised");
                    
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                   // rename uncat to uncat stub if no uncat stub. If uncat and uncat stub, remove uncat.
                    bool uncatstub = false;
                    foreach(Match u in WikiRegexes.Uncat.Matches(articleText))
                    {
                        if(WikiRegexes.Stub.IsMatch(u.Value))
                        {
                            uncatstub = true;
                            break;
                        }
                    }

                    articleText = WikiRegexes.Uncat.Replace(articleText, u2 => {
                                                                if (!uncatstub) // rename
                                                                {
                                                                    tagsrenamed++;
                                                                    if (Variables.LangCode.Equals("ar"))
                                                                        return Tools.RenameTemplate(u2.Value, "بذرة غير مصنفة");
                                                                    else if (Variables.LangCode.Equals("arz"))
                                                                        return Tools.RenameTemplate(u2.Value, "تقاوى مش متصنفه");
                                                                    else if (Variables.LangCode.Equals("en") || Variables.LangCode.Equals("simple"))
                                                                        return Tools.RenameTemplate(u2.Value, "Uncategorized stub");
                                                                }
                                                                else // already uncat stub so remove plain uncat
                                                                {
                                                                    if(!WikiRegexes.Stub.IsMatch(u2.Value))
                                                                    {
                                                                        if (Variables.LangCode.Equals("ar"))
                                                                            tagsRemoved.Add("غير مصنفة");
                                                                        else if (Variables.LangCode.Equals("arz"))
                                                                            tagsRemoved.Add("مش متصنفه");
                                                                        else
                                                                            tagsRemoved.Add("uncategorised");
                                                                        return "";
                                                                    }
                                                                }
                                                                return u2.Value;
                                                            });
                }
            }

            if (wikiLinkCount == 0 &&
                !WikiRegexes.DeadEnd.IsMatch(articleText) &&
                !WikiRegexes.SIAs.IsMatch(articleText) &&
                !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText) &&
                !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle)
               )
            {
                // add dead-end tag
                // no blank line between dead end and orphan tags for ar/arz
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + (WikiRegexes.Orphan.IsMatch(articleText) ? "" : "\r\n") + articleText;
                    tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]");
                    // if dead end then remove underlinked/wikify
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
                        articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكي");
                    }
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n" + articleText;
                    tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
                        articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكى");
                    }
                }
                else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText)
                         && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b")
                         && !MinorPlanetListFooter.IsMatch(articleText))
                {
                    // Don't add excess newlines between new tags
                    articleText = "{{Dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText;
                    tagsAdded.Add("[[CAT:DE|deadend]]");
                    // if dead end then remove underlinked
                    if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1)
                    {
                        articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("underlinked");
                    }
                }
            }
            // add underlinked/wikify tag, don't add underlinked/wikify if {{dead end}} already present
            // Dont' tag SIA pages, may create wikilinks from templates
            else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                     && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")
                     && !WikiRegexes.DeadEnd.IsMatch(articleText)
                     && !WikiRegexes.SIAs.IsMatch(articleText)
                     && !WikiRegexes.NonDeadEndPageTemplates.IsMatch(articleText)
                     && !WikiRegexes.MeaningsOfMinorPlanetNames.IsMatch(articleTitle))
            {
                // Avoid excess newlines between templates
                string templateEnd = "}}\r\n" + (articleText.TrimStart().StartsWith(@"{{") ? "" : "\r\n");
                
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText.TrimStart();
                    tagsAdded.Add("[[وب:ويكي|ويكي]]");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[قالب:ويكى|ويكى]]");
                }
                else if (Variables.LangCode.Equals("sv"))
                {
                    articleText = "{{Ickewiki|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[WP:PW|ickewiki]]");
                }
                else
                {
                    articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[CAT:UL|underlinked]]");
                }
            }
            else if (wikiLinkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.Wikify.Replace(articleText, "");
                else
                    // remove wikify, except section templates or wikify tags with reason parameter specified
                    articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.Wikify.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("ويكي");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("ويكى");
                    }
                    else
                    {
                        tagsRemoved.Add("underlinked");
                    }
                }
            }

            // rename unreferenced --> refimprove if has existing refs, update date
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0)
            {
                articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"));

                // update tag in old-style multiple issues
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove");
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}");
                    if (!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0 || tagsrenamed > 0)
            {
                Parsers p = new Parsers();
                HideText hider = new HideText();

                articleText = hider.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = hider.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                // Don't sort interwikis, we can't specify the correct InterWikiSortOrder
                p.SortInterwikis = false;
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
Example #3
0
 /// <summary>
 /// Sets the date (month & year) for undated cleanup tags that take a date
 /// Avoids changing tags in unformatted text areas (wiki comments etc.)
 /// </summary>
 /// <param name="articleText">The wiki text of the article.</param>
 /// <returns>The updated article text</returns>
 public static string TagUpdater(string articleText)
 {
     HideText ht = new HideText();
     articleText = ht.HideUnformatted(articleText);
     
     foreach (KeyValuePair<Regex, string> k in RegexTagger)
     {
         articleText = k.Key.Replace(articleText, k.Value);
     }
     return ht.AddBackUnformatted(articleText);
 }
Example #4
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
            // don't tag redirects/outside article namespace/no tagging changes
            if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText))
                return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped))/2;

            // remove stub tags from long articles
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();
                tagsRemoved.Add("stub");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            // on en wiki, remove expand template when a stub template exists
            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests/Archive_5#Remove_.7B.7Bexpand.7D.7D_when_a_stub_template_exists
            if (Variables.LangCode == "en" && WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                WikiRegexes.Expand.IsMatch(commentsCategoriesStripped))
            {
                articleText = WikiRegexes.Expand.Replace(articleText, "");
                tagsRemoved.Add("expand");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            
            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            int linkCount = Tools.LinkCount(commentsStripped);

            #if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
                #endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                List<Article> Cats = CategoryProv.MakeList(new[] {articleTitle});
                List<Article> CatsNotStubs = new List<Article>();

                foreach (Article a in Cats)
                {
                    if (!a.Name.EndsWith(" stubs") && !a.Name.EndsWith(":Stubs"))
                        CatsNotStubs.Add(a);
                }
                totalCategories = CatsNotStubs.Count;
            }

            if (linkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText))
            {
                articleText = WikiRegexes.DeadEnd.Replace(articleText, new MatchEvaluator(SectionTagME));
                
                if(!WikiRegexes.DeadEnd.IsMatch(articleText))
                    tagsRemoved.Add("deadend");
            }

            // discount persondata along with comments and categories from wikify and stub evaluation
            int length = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "").Length + 1;
            bool underlinked = (linkCount < 0.0025*length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag
                articleText += Tools.Newline("{{stub}}", 3);
                tagsAdded.Add("stub");
                commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            }

            // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl"
                && !Tools.NestedTemplateRegex("cat improve").IsMatch(articleText)
                // category count is from API; don't add uncat tag if genfixes added person categories
                && !WikiRegexes.DeathsOrLivingCategory.IsMatch(articleText)
                && !WikiRegexes.BirthsCategory.IsMatch(articleText))
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter +
                        @"}}";
                    tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                }
                else
                {
                    // add uncategorized tag
                    articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                    tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "");
                    tagsRemoved.Add("uncategorised");
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value;
                    if (!uncatname.Contains("stub"))
                        articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub");
                }
            }

            if (linkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && Variables.LangCode != "sv"
                && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b"))
            {
                // add dead-end tag
                articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                tagsAdded.Add("[[:Category:Dead-end pages|deadend]]");
            }

            if (linkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify"))
            {
                // add wikify tag
                articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                tagsAdded.Add("[[WP:WFY|wikify]]");
            }
            else if (linkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                articleText = WikiRegexes.Wikify.Replace(articleText, new MatchEvaluator(SectionTagME));
                
                if(!WikiRegexes.Wikify.IsMatch(articleText))
                    tagsRemoved.Add("wikify");
            }

            // rename unreferenced --> refimprove if has existing refs
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && WikiRegexes.Refs.Matches(commentsCategoriesStripped).Count > 0)
            {
                articleText = Tools.RenameTemplate(articleText, "unreferenced", "refimprove", true);
                
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if(m.Success)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value,  "unreferenced", "refimprove");
                    if(!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0)
            {
                Parsers p = new Parsers();
                HideText ht = new HideText();

                articleText = ht.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = ht.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
Example #5
0
        //TODO:Needs re-write
        /// <summary>
        /// If necessary, adds/removes wikify or stub tag
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">The article title.</param>
        /// <param name="restrictOrphanTagging"></param>
        /// <param name="summary"></param>
        /// <returns>The tagged article.</returns>
        public string Tagger(string articleText, string articleTitle, bool restrictOrphanTagging, ref string summary)
        {
            // don't tag redirects/outside article namespace/no tagging changes
            if (!Namespace.IsMainSpace(articleTitle) || Tools.IsRedirect(articleText) || WikiRegexes.Wi.IsMatch(articleText))
                return articleText;

            tagsRemoved.Clear();
            tagsAdded.Clear();

            string commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            string commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");
            Sorter.Interwikis(ref commentsStripped);

            // bulleted or indented text should weigh less than simple text.
            // for example, actor stubs may contain large filmographies
            string crapStripped = WikiRegexes.BulletedText.Replace(commentsCategoriesStripped, "");
            int words = (Tools.WordCount(commentsCategoriesStripped) + Tools.WordCount(crapStripped)) / 2;

            // remove stub tags from long articles, don't move section stubs
            if ((words > StubMaxWordCount) && WikiRegexes.Stub.IsMatch(commentsStripped))
            {
                articleText = WikiRegexes.Stub.Replace(articleText, StubChecker).Trim();
                tagsRemoved.Add("stub");
            }

            // refresh
            commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
            commentsCategoriesStripped = WikiRegexes.Category.Replace(commentsStripped, "");

            // do orphan tagging before template analysis for categorisation tags
            articleText = TagOrphans(articleText, articleTitle, restrictOrphanTagging);

            articleText = TagRefsIbid(articleText);

            articleText = TagEmptySection(articleText);

            int totalCategories;
            // ignore commented out wikilinks, and any in {{Proposed deletion/dated}}
            int wikiLinkCount = Tools.LinkCount(ProposedDeletionDatedEndorsed.Replace(commentsStripped, ""));

#if DEBUG || UNITTEST
            if (Globals.UnitTestMode)
            {
                totalCategories = Globals.UnitTestIntValue;
            }
            else
#endif
            {
                // stubs add non-hidden stub categories, don't count these in categories count
                // also don't count "Proposed deletion..." cats
                List<Article> Cats = CategoryProv.MakeList(new[] { articleTitle });
                totalCategories = RegularCategories(Cats).Count;

                // cats may have been added to page by genfixes, F&R or user (when reparsing) so check cats on page if API says zero
                // so we correctly count for uncat tagging
                if(totalCategories == 0)
                    totalCategories = RegularCategories(articleText).Count;
            }

            // remove dead end when wikilinks on page, but not for en-wiki where dead end can mean "not enough" links
            if (wikiLinkCount > 0 && WikiRegexes.DeadEnd.IsMatch(articleText)
                && !Variables.LangCode.Equals("en"))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, "");
                else
                    articleText = WikiRegexes.DeadEnd.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.DeadEnd.IsMatch(articleText))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        tagsRemoved.Add("نهاية مسدودة");
                    }
                    else
                    {
                        tagsRemoved.Add("deadend");
                    }
                }
            }

            // discount persondata, comments, infoboxes and categories from wikify and stub evaluation
            string lengthtext = commentsCategoriesStripped;
            lengthtext = WikiRegexes.Persondata.Replace(commentsCategoriesStripped, "");
            lengthtext = WikiRegexes.InfoBox.Replace(lengthtext, "");
            lengthtext = Drugbox.Replace(lengthtext, "");

            int length = lengthtext.Length + 1;
            bool underlinked = (wikiLinkCount < 0.0025 * length);

            if (length <= 300 && !WikiRegexes.Stub.IsMatch(commentsCategoriesStripped) &&
                !WikiRegexes.Disambigs.IsMatch(commentsCategoriesStripped) && !WikiRegexes.SIAs.IsMatch(commentsCategoriesStripped))
            {
                // add stub tag. Exclude pages their title starts with "List of..."
                if (!ListOf.IsMatch(articleTitle))
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة}}", 3);
                        tagsAdded.Add("بذرة");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى}}", 3);
                        tagsAdded.Add("تقاوى");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{stub}}", 3);
                        tagsAdded.Add("stub");
                    }
                    commentsStripped = WikiRegexes.Comments.Replace(articleText, "");
                }
            }

            // rename existing {{improve categories}} else add uncategorized tag
            if (totalCategories == 0 && ImproveCategories.IsMatch(articleText))
                articleText = Tools.RenameTemplate(articleText, "improve categories", "Uncategorized");

            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Archive_19#AWB_problems
            // nl wiki doesn't use {{Uncategorized}} template
            // prevent wictionary redirects from being tagged as uncategorised
            if (words > 6 && totalCategories == 0
                && !WikiRegexes.Uncat.IsMatch(articleText)
                && Variables.LangCode != "nl")
            {
                if (WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    // add uncategorized stub tag
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{بذرة غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[تصنيف:مقالات غير مصنفة|غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{تقاوى مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[قالب:تقاوى مش متصنفه|تقاوى مش متصنفه]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized stub|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCATSTUBS|uncategorised]]");
                    }
                }
                else
                {
                    if (Variables.LangCode.Equals("ar"))
                    {
                        articleText += Tools.Newline("{{غير مصنفة|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مقالات غير مصنفة]]");
                    }
                    else if (Variables.LangCode.Equals("arz"))
                    {
                        articleText += Tools.Newline("{{مش متصنفه|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|مش متصنفه]]");
                    }
                    else
                    {
                        articleText += Tools.Newline("{{Uncategorized|", 2) + WikiRegexes.DateYearMonthParameter + @"}}";
                        tagsAdded.Add("[[CAT:UNCAT|uncategorised]]");
                    }
                }
            }

            // remove {{Uncategorized}} if > 0 real categories (stub categories not counted)
            // rename {{Uncategorized}} to {{Uncategorized stub}} if stub with zero categories (stub categories not counted)
            if (WikiRegexes.Uncat.IsMatch(articleText))
            {
                if (totalCategories > 0)
                {
                    articleText = WikiRegexes.Uncat.Replace(articleText, "").TrimStart();
                    	if (Variables.LangCode.Equals("ar"))
	                    	tagsRemoved.Add("غير مصنفة");
                    	else if (Variables.LangCode.Equals("arz"))
	                    	tagsRemoved.Add("مش متصنفه");
                    	else
	                    	tagsRemoved.Add("uncategorised");
                    		
                }
                else if (totalCategories == 0 && WikiRegexes.Stub.IsMatch(commentsStripped))
                {
                    string uncatname = WikiRegexes.Uncat.Match(articleText).Groups[1].Value;
                    if (!uncatname.Contains("stub"))
                    {
                    	if (Variables.LangCode.Equals("ar"))
                    	    articleText = Tools.RenameTemplate(articleText, uncatname, "بذرة غير مصنفة");
                    	else
                    	    articleText = Tools.RenameTemplate(articleText, uncatname, "Uncategorized stub");
                    		
                    }
                }
            }

            if (wikiLinkCount == 0 && !WikiRegexes.DeadEnd.IsMatch(articleText) && !WikiRegexes.SIAs.IsMatch(articleText))
            {
                // add dead-end tag
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{نهاية مسدودة|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                    tagsAdded.Add("[[:تصنيف:مقالات نهاية مسدودة|نهاية مسدودة]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
		                articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكي");
                    }
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{نهايه مسدوده|" + WikiRegexes.DateYearMonthParameter + "}}\r\n\r\n" + articleText;
                    tagsAdded.Add("[[:قالب:نهايه مسدوده|نهايه مسدوده]]");
                    // if dead end then remove underlinked
                    if(WikiRegexes.Wikify.IsMatch(articleText))
                    {
		                articleText = WikiRegexes.Wikify.Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("ويكى");
                    }
                }
                else if (Variables.LangCode != "sv" && !WikiRegexes.Centuryinbox.IsMatch(articleText)  && !Regex.IsMatch(WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower(), @"\bdead ?end\b"))
                {
                    // Don't add excess newlines between new tags
                    articleText = "{{dead end|" + WikiRegexes.DateYearMonthParameter + "}}" + (tagsAdded.Count > 0 ? "\r\n" : "\r\n\r\n") + articleText;
                    tagsAdded.Add("[[CAT:DE|deadend]]");
                    // if dead end then remove underlinked
                    if(articleText.IndexOf("underlinked", StringComparison.OrdinalIgnoreCase) > -1)
                    {
                        articleText = Tools.NestedTemplateRegex("underlinked").Replace(articleText, "").TrimStart();
                        tagsRemoved.Add("underlinked");
                    }
                }
            }
            // add wikify tag, don't add underlinked/wikify if {{dead end}} already present
            // Dont' tag SIA pages, may create wikilinks from templates
            else if (wikiLinkCount < 3 && underlinked && !WikiRegexes.Wikify.IsMatch(articleText)
                     && !WikiRegexes.MultipleIssues.Match(articleText).Value.ToLower().Contains("wikify")
                     && !WikiRegexes.DeadEnd.IsMatch(articleText)
                     && !WikiRegexes.SIAs.IsMatch(articleText))
            {
                // Avoid excess newlines between templates
                string templateEnd = "}}\r\n" + (articleText.StartsWith(@"{{") ? "" : "\r\n");
                
                if (Variables.LangCode.Equals("ar"))
                {
                    articleText = "{{ويكي|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[وب:ويكي|ويكي]]");
                }
                else if (Variables.LangCode.Equals("arz"))
                {
                    articleText = "{{ويكى|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[قالب:ويكى|ويكى]]");
                }
                else if (Variables.LangCode.Equals("sv"))
                {
                    articleText = "{{Wikify|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[WP:WFY|wikify]]");
                }
                else
                {
                    articleText = "{{Underlinked|" + WikiRegexes.DateYearMonthParameter + templateEnd + articleText;
                    tagsAdded.Add("[[CAT:UL|underlinked]]");
                }
            }
            else if (wikiLinkCount > 3 && !underlinked &&
                     WikiRegexes.Wikify.IsMatch(articleText))
            {
                if (Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz"))
                    articleText = WikiRegexes.Wikify.Replace(articleText, "");
                else
                    // remove wikify, except section templates or wikify tags with reason parameter specified
                    articleText = WikiRegexes.Wikify.Replace(articleText, m => Tools.IsSectionOrReasonTemplate(m.Value, articleText) ? m.Value : m.Groups[1].Value).TrimStart();

                if (!WikiRegexes.Wikify.IsMatch(articleText))
                {
	                if (Variables.LangCode.Equals("ar"))
	                {
	                	tagsRemoved.Add("ويكي");
                	}     
	                else if (Variables.LangCode.Equals("arz"))
	                {
	                	tagsRemoved.Add("ويكى");
                	}
	                else
	                {
	                	tagsRemoved.Add("underlinked");
                	}
                }
            }

            // rename unreferenced --> refimprove if has existing refs, update date
            if (WikiRegexes.Unreferenced.IsMatch(commentsCategoriesStripped)
                && (TotalRefsNotGrouped(commentsCategoriesStripped) + Tools.NestedTemplateRegex("sfn").Matches(articleText).Count) > 0)
            {
                articleText = Unreferenced.Replace(articleText, m2 => Tools.UpdateTemplateParameterValue(Tools.RenameTemplate(m2.Value, "refimprove"), "date", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}"));

                // update tag in old-style multiple issues
                Match m = WikiRegexes.MultipleIssues.Match(articleText);
                if (m.Success && Tools.GetTemplateParameterValue(m.Value, "unreferenced").Length > 0)
                {
                    string newValue = Tools.RenameTemplateParameter(m.Value, "unreferenced", "refimprove");
                    newValue = Tools.UpdateTemplateParameterValue(newValue, "refimprove", "{{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}");
                    if (!newValue.Equals(m.Value))
                        articleText = articleText.Replace(m.Value, newValue);
                }
            }

            if (tagsAdded.Count > 0 || tagsRemoved.Count > 0)
            {
                Parsers p = new Parsers();
                HideText ht = new HideText();

                articleText = ht.HideUnformatted(articleText);

                articleText = p.MultipleIssues(articleText);
                articleText = Conversions(articleText);
                articleText = ht.AddBackUnformatted(articleText);

                // sort again in case tag removal requires whitespace cleanup
                // Don't sort interwikis, we can't specify the correct InterWikiSortOrder
                p.SortInterwikis = false;
                articleText = p.Sorter.Sort(articleText, articleTitle);
            }

            summary = PrepareTaggerEditSummary();

            return articleText;
        }
Example #6
0
 /// <summary>
 /// Sets the date (month & year) for undated cleanup tags that take a date
 /// Avoids changing tags in unformatted text areas (wiki comments etc.)
 /// Note: bugzilla 2700 means {{ssubst}} within ref tags doesn't work, AWB doesn't do anything about it
 /// </summary>
 /// <param name="articleText">The wiki text of the article.</param>
 /// <returns>The updated article text</returns>
 public static string TagUpdater(string articleText)
 {
     HideText ht = new HideText();
     articleText = ht.HideUnformatted(articleText);
     
     foreach (KeyValuePair<Regex, string> k in RegexTagger)
     {
         articleText = k.Key.Replace(articleText, 
                                     m => (Tools.GetTemplateParameterValue(m.Value, "Date").Length > 0 ? 
                                           Tools.RenameTemplateParameter(m.Value, "Date", "date") : k.Value.Replace("$1", m.Groups[1].Value)));
     }
     return ht.AddBackUnformatted(articleText);
 }