public List <Article> MakeList(params string[] searchCriteria)
        {
            if (Visible)
            {
                return(null);
            }

            txtPages.Text = "";

            List <Article> list = new List <Article>();

            if (ShowDialog() == DialogResult.OK)
            {
                searchCriteria = txtPages.Text.Split(new [] { '|' });

                ISpecialPageProvider item = (ISpecialPageProvider)cmboSourceSelect.SelectedItem;

                if (!string.IsNullOrEmpty(txtPages.Text))
                {
                    list = item.MakeList(Namespace.Determine(cboNamespace.Text), searchCriteria);
                }
                else if (item.PagesNeeded)
                {
                    MessageBox.Show("Pages needed!");
                }
                else
                {
                    list = item.MakeList(Namespace.Determine(cboNamespace.Text), new[] { "" });
                }
            }

            Hide();

            return(list);
        }
Ejemplo n.º 2
0
        // Covered by: RecategorizerTests.Addition()
        /// <summary>
        /// Adds the category to the article.
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="newCategory">The new category.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <returns>The article text.</returns>
        public string AddCategory(string newCategory, string articleText, string articleTitle)
        {
            string oldText = articleText;

            articleText = FixCategories(articleText);

            if (Regex.IsMatch(articleText, @"\[\["
                              + Variables.NamespacesCaseInsensitive[Namespace.Category]
                              + Regex.Escape(newCategory) + @"[\|\]]"))
            {
                return(oldText);
            }

            string cat = Tools.Newline("[[" + Variables.Namespaces[Namespace.Category] + newCategory + "]]");

            cat = Tools.ApplyKeyWords(articleTitle, cat);

            if (Namespace.Determine(articleTitle) == Namespace.Template)
            {
                articleText += "<noinclude>" + cat + Tools.Newline("</noinclude>");
            }
            else
            {
                articleText += cat;
            }

            return(SortMetaData(articleText, articleTitle, false)); // Sort metadata ordering so general fixes do not need to be enabled
        }
Ejemplo n.º 3
0
        public List <Article> MakeList(params string[] searchCriteria)
        {
            txtPages.Text = "";

            List <Article> list = new List <Article>();

            if (ShowDialog() == DialogResult.OK)
            {
                searchCriteria = txtPages.Text.Split(new [] { '|' });

                ISpecialPageProvider item = (ISpecialPageProvider)cmboSourceSelect.SelectedItem;

                //TODO: There must be a better way to do the NS determination??
                if (!string.IsNullOrEmpty(txtPages.Text))
                {
                    list = item.MakeList(Namespace.Determine(cboNamespace.Text), searchCriteria);
                }
                else if (item.PagesNeeded)
                {
                    MessageBox.Show("Pages needed!");
                }
                else
                {
                    list = item.MakeList(Namespace.Determine(cboNamespace.Text), new[] { "" });
                }
            }

            return(Tools.FilterSomeArticles(list));
        }
Ejemplo n.º 4
0
 public void DetermineDeviations()
 {
     Assert.AreEqual(Namespace.File, Namespace.Determine("File : foo"));
     Assert.AreEqual(Namespace.User, Namespace.Determine("user:foo"));
     Assert.AreEqual(Namespace.UserTalk, Namespace.Determine("user_talk:foo"));
     Assert.AreEqual(Namespace.UserTalk, Namespace.Determine("user%20talk:foo"));
 }
Ejemplo n.º 5
0
        public void Determine()
        {
            Assert.AreEqual(0, Namespace.Determine("test"));
            Assert.AreEqual(0, Namespace.Determine(":test"));
            Assert.AreEqual(0, Namespace.Determine("test:test"));
            Assert.AreEqual(0, Namespace.Determine("My Project:Foo"));
            Assert.AreEqual(0, Namespace.Determine("User:"******"Talk:foo"));
            Assert.AreEqual(Namespace.UserTalk, Namespace.Determine("User talk:bar"));

            Assert.AreEqual(Namespace.File, Namespace.Determine("File:foo"));
            Assert.AreEqual(Namespace.File, Namespace.Determine("Image:foo"));

            Assert.AreEqual(Namespace.Project, Namespace.Determine("Wikipedia:Foo"));
            Assert.AreEqual(Namespace.Project, Namespace.Determine("Project:Foo"));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Checks the XML returned by the server for error codes and throws an appropriate exception
        /// </summary>
        /// <param name="xml">Server output</param>
        /// <param name="action">The action performed, null if don't check</param>
        private XmlDocument CheckForErrors(string xml, string action)
        {
            if (string.IsNullOrEmpty(xml))
            {
                throw new ApiBlankException(this);
            }

            var doc = new XmlDocument();

            try
            {
                doc.Load(new StringReader(xml));
            }
            catch (XmlException xe)
            {
                Tools.WriteDebug("ApiEdit::CheckForErrors", xml);

                string postParams = "";
                if (lastPostParameters != null)
                {
                    int length = lastPostParameters.GetUpperBound(0);
                    for (int i = 0; i <= length; i++)
                    {
                        if (lastPostParameters[i, 0].Contains("password") || lastPostParameters[i, 0].Contains("token"))
                        {
                            lastPostParameters[i, 1] = "<removed>";
                        }
                    }
                    postParams = BuildQuery(lastPostParameters);
                }
                throw new ApiXmlException(this, xe, lastGetUrl, postParams, xml);
            }

            //TODO: can't figure out the best time for this check
            bool prevMessages = User.HasMessages;

            User.Update(doc);
            if (action != "login" &&
                action != "userinfo" &&
                NewMessageThrows &&
                User.HasMessages &&
                !prevMessages)
            {
                throw new NewMessagesException(this);
            }

            var errors = doc.GetElementsByTagName("error");

            if (errors.Count > 0)
            {
                var    error        = errors[0];
                string errorCode    = error.Attributes["code"].Value;
                string errorMessage = error.Attributes["info"].Value;

                switch (errorCode.ToLower())
                {
                case "maxlag":     //guessing
                    int maxlag;
                    int.TryParse(MaxLag.Match(xml).Groups[1].Value, out maxlag);
                    throw new MaxlagException(this, maxlag, 10);

                case "wrnotloggedin":
                    throw new LoggedOffException(this);

                case "spamdetected":
                    throw new SpamlistException(this, errorMessage);

                //case "confirmemail":
                //
                default:
                    if (errorCode.Contains("disabled"))
                    {
                        throw new FeatureDisabledException(this, errorCode, errorMessage);
                    }

                    throw new ApiErrorException(this, errorCode, errorMessage);
                }
            }

            if (string.IsNullOrEmpty(action))
            {
                return(doc);                              // no action to check
            }
            var api = doc["api"];

            if (api == null)
            {
                return(doc);
            }

            var redirects = api.GetElementsByTagName("r");

            if (action == "query" && redirects.Count >= 1) //We have redirects
            {
                // Workaround for https://bugzilla.wikimedia.org/show_bug.cgi?id=39492
                if (Namespace.IsSpecial(Namespace.Determine(redirects[redirects.Count - 1].Attributes["to"].Value)))
                {
                    throw new RedirectToSpecialPageException(this);
                }
            }

            //FIXME: Awful code is awful
            var page = api.GetElementsByTagName("page");

            if (page.Count > 0 && page[0].Attributes != null && page[0].Attributes["invalid"] != null &&
                page[0].Attributes["invalid"].Value == "")
            {
                throw new InvalidTitleException(this, page[0].Attributes["title"].Value);
            }

            if (api.GetElementsByTagName("interwiki").Count > 0)
            {
                throw new InterwikiException(this);
            }

            var actionElement = api[action];

            if (actionElement == null)
            {
                return(doc);                       // or shall we explode?
            }
            if (actionElement.HasAttribute("assert"))
            {
                string what = actionElement.GetAttribute("assert");
                if (what == "user")
                {
                    throw new LoggedOffException(this);
                }
                throw new AssertionFailedException(this, what);
            }

            if (actionElement.HasAttribute("spamblacklist"))
            {
                throw new SpamlistException(this, actionElement.GetAttribute("spamblacklist"));
            }

            if (actionElement.GetElementsByTagName("captcha").Count > 0)
            {
                throw new CaptchaException(this);
            }

            string result = actionElement.GetAttribute("result");

            if (!string.IsNullOrEmpty(result) && result != "Success")
            {
                throw new OperationFailedException(this, action, result, xml);
            }

            return(doc);
        }
Ejemplo n.º 7
0
 public override bool Check(ArticleInfo article)
 {
     return(Namespaces.Contains(Namespace.Determine(article.Title)));
 }
Ejemplo n.º 8
0
        /// <summary>
        /// Sorts article meta data
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <param name="fixExcessWhitespace">Whether to request optional excess whitespace to be fixed</param>
        /// <returns>The updated article text</returns>
        internal string Sort(string articleText, string articleTitle, bool fixOptionalWhitespace)
        {
            if (Namespace.Determine(articleTitle) == Namespace.Template) // Don't really want to be fooling around with templates
            {
                return(articleText);
            }

            string strSave = articleText;

            try
            {
                articleText = Regex.Replace(articleText, "<!-- ?\\[\\[en:.*?\\]\\] ?-->", "");

                string personData = Tools.Newline(RemovePersonData(ref articleText));
                string disambig   = Tools.Newline(RemoveDisambig(ref articleText));
                string categories = Tools.Newline(RemoveCats(ref articleText, articleTitle));
                string interwikis = Tools.Newline(Interwikis(ref articleText));

                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests#Move_orphan_tags_on_the_top
                // Dablinks above orphan tags per [[WP:LAYOUT]]
                if (Variables.LangCode == "en")
                {
                    articleText = MoveMaintenanceTags(articleText);
                }

                articleText = MoveDablinks(articleText);

                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests#Placement_of_portal_template
                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests.html#Move_nofootnotes_to_the_references_section
                if (Variables.LangCode == "en")
                {
                    articleText = MovePortalTemplates(articleText);
                    articleText = MoveSisterlinks(articleText);
                    articleText = MoveTemplateToReferencesSection(articleText, WikiRegexes.Ibid);
                    articleText = MoveExternalLinks(articleText);
                    articleText = MoveSeeAlso(articleText);
                }

                // two newlines here per http://en.wikipedia.org/w/index.php?title=Wikipedia_talk:AutoWikiBrowser&oldid=243224092#Blank_lines_before_stubs
                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Two_empty_lines_before_stub-templates
                // Russian wiki uses only one newline
                string strStub = Tools.Newline(RemoveStubs(ref articleText), Variables.LangCode == "ru" ? 1 : 2);

                //filter out excess white space and remove "----" from end of article
                articleText  = Parsers.RemoveWhiteSpace(articleText, fixOptionalWhitespace) + "\r\n";
                articleText += disambig;

                switch (Variables.LangCode)
                {
                case "de":
                case "sl":
                    articleText += strStub + categories + personData;

                    // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser#Removal_of_blank_lines
                    // on de wiki a blank line is desired between persondata and interwikis
                    if (Variables.LangCode == "de" && personData.Length > 0 && interwikis.Length > 0)
                    {
                        articleText += "\r\n";
                    }
                    break;

                case "pl":
                case "ru":
                case "simple":
                    articleText += personData + strStub + categories;
                    break;

                default:
                    articleText += personData + categories + strStub;
                    break;
                }
                return((articleText + interwikis).TrimEnd());
            }
            catch (Exception ex)
            {
                if (!ex.Message.Contains("DEFAULTSORT"))
                {
                    ErrorHandler.Handle(ex);
                }
                return(strSave);
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Performs some cleanup operations on dablinks
        /// Merges some for & about dablinks
        /// Merges multiple distinguish into one
        /// </summary>
        /// <param name="articleText">The article text</param>
        /// <returns>The updated article text</returns>
        public static string Dablinks(string articleText)
        {
            if (!Variables.LangCode.Equals("en"))
            {
                return(articleText);
            }

            string zerothSection = Tools.GetZerothSection(articleText);
            string restOfArticle = articleText.Substring(zerothSection.Length);

            articleText = zerothSection;

            // conversions

            // otheruses4 rename - Wikipedia only
            if (Variables.IsWikipediaEN)
            {
                articleText = Tools.RenameTemplate(articleText, "otheruses4", "about");
            }

            // "{{about|about x..." --> "{{about|x..."
            foreach (Match m in Tools.NestedTemplateRegex("about").Matches(articleText))
            {
                if (m.Groups[3].Value.TrimStart("| ".ToCharArray()).StartsWith("about", StringComparison.OrdinalIgnoreCase))
                {
                    articleText = articleText.Replace(m.Value, m.Groups[1].Value + m.Groups[2].Value + Regex.Replace(m.Groups[3].Value, @"^\|\s*[Aa]bout\s*", "|"));
                }
            }

            // merging

            // multiple same about into one
            string oldArticleText = "";

            while (oldArticleText != articleText)
            {
                oldArticleText = articleText;
                bool doneAboutMerge = false;
                foreach (Match m in Tools.NestedTemplateRegex("about").Matches(articleText))
                {
                    string firstarg = Tools.GetTemplateArgument(m.Value, 1);

                    foreach (Match m2 in Tools.NestedTemplateRegex("about").Matches(articleText))
                    {
                        if (m2.Value == m.Value)
                        {
                            continue;
                        }

                        // match when reason is the same, not matching on self
                        if (Tools.GetTemplateArgument(m2.Value, 1).Equals(firstarg))
                        {
                            // argument 2 length > 0
                            if (Tools.GetTemplateArgument(m.Value, 2).Length > 0 && Tools.GetTemplateArgument(m2.Value, 2).Length > 0)
                            {
                                articleText    = articleText.Replace(m.Value, m.Value.TrimEnd('}') + @"|" + Tools.GetTemplateArgument(m2.Value, 2) + @"|" + Tools.GetTemplateArgument(m2.Value, 3) + @"}}");
                                doneAboutMerge = true;
                            }

                            // argument 2 is null
                            if (Tools.GetTemplateArgument(m.Value, 2).Length == 0 && Tools.GetTemplateArgument(m2.Value, 2).Length == 0)
                            {
                                articleText    = articleText.Replace(m.Value, m.Value.TrimEnd('}') + @"|and|" + Tools.GetTemplateArgument(m2.Value, 3) + @"}}");
                                doneAboutMerge = true;
                            }
                        }
                        // match when reason of one is null, the other not
                        else if (Tools.GetTemplateArgument(m2.Value, 1).Length == 0)
                        {
                            // argument 2 length > 0
                            if (Tools.GetTemplateArgument(m.Value, 2).Length > 0 && Tools.GetTemplateArgument(m2.Value, 2).Length > 0)
                            {
                                articleText    = articleText.Replace(m.Value, m.Value.TrimEnd('}') + @"|" + Tools.GetTemplateArgument(m2.Value, 2) + @"|" + Tools.GetTemplateArgument(m2.Value, 3) + @"}}");
                                doneAboutMerge = true;
                            }
                        }

                        if (doneAboutMerge)
                        {
                            articleText = articleText.Replace(m2.Value, "");
                            break;
                        }
                    }
                    if (doneAboutMerge)
                    {
                        break;
                    }
                }
            }

            // multiple for into about: rename a 2-argument for into an about with no reason value
            if (Tools.NestedTemplateRegex("for").Matches(articleText).Count > 1 && Tools.NestedTemplateRegex("about").Matches(articleText).Count == 0)
            {
                foreach (Match m in Tools.NestedTemplateRegex("for").Matches(articleText))
                {
                    if (Tools.GetTemplateArgument(m.Value, 3).Length == 0)
                    {
                        articleText = articleText.Replace(m.Value, Tools.RenameTemplate(m.Value, "about|"));
                        break;
                    }
                }
            }

            // for into existing about, when about has >=2 arguments
            if (Tools.NestedTemplateRegex("about").Matches(articleText).Count == 1 &&
                Tools.GetTemplateArgument(Tools.NestedTemplateRegex("about").Match(articleText).Value, 2).Length > 0)
            {
                foreach (Match m in Tools.NestedTemplateRegex("for").Matches(articleText))
                {
                    string about = Tools.NestedTemplateRegex("about").Match(articleText).Value;

                    // about supports up to 9 arguments
                    if (Tools.GetTemplateArgument(about, 9).Length > 0)
                    {
                        continue;
                    }

                    string extra = "";

                    // where about has 2 arguments need extra pipe
                    if (Tools.GetTemplateArgument(Tools.NestedTemplateRegex("about").Match(articleText).Value, 3).Length == 0 &&
                        Tools.GetTemplateArgument(Tools.NestedTemplateRegex("about").Match(articleText).Value, 4).Length == 0)
                    {
                        extra = @"|";
                    }

                    // append {{for}} value to the {{about}}
                    if (Tools.GetTemplateArgument(m.Value, 3).Length == 0)
                    {
                        articleText = articleText.Replace(about, about.TrimEnd('}') + extra + m.Groups[3].Value);
                    }
                    else if (Tools.GetTemplateArgument(m.Value, 4).Length == 0)  // where for has 3 arguments need extra and
                    {
                        articleText = articleText.Replace(about, about.TrimEnd('}') + extra + m.Groups[3].Value.Insert(m.Groups[3].Value.LastIndexOf('|') + 1, "and|"));
                    }

                    // if there are 4 arguments do nothing
                    // remove the old {{for}}
                    if (Tools.GetTemplateArgument(m.Value, 4).Length == 0)
                    {
                        articleText = articleText.Replace(m.Value, "");
                    }
                }

                // if for with blank first argument copied over then now need to put "other uses" as the argment
                articleText = Tools.NestedTemplateRegex("about").Replace(articleText, m2 => {
                    string res = m2.Value;
                    if (Tools.GetTemplateArgument(res, 7).Length > 0 && Tools.GetTemplateArgument(res, 6).Length == 0)
                    {
                        res = res.Insert(Tools.GetTemplateArgumentIndex(res, 6), "other uses");
                    }
                    return(res);
                });
            }

            // non-mainspace links need escaping in {{about}}
            foreach (Match m in Tools.NestedTemplateRegex("about").Matches(articleText))
            {
                string aboutcall = m.Value;
                for (int a = 1; a <= Tools.GetTemplateArgumentCount(m.Value); a++)
                {
                    string arg = Tools.GetTemplateArgument(aboutcall, a);
                    if (arg.Length > 0 && Namespace.Determine(arg) != Namespace.Mainspace)
                    {
                        aboutcall = aboutcall.Replace(arg, @":" + arg);
                    }
                }

                if (!m.Value.Equals(aboutcall))
                {
                    articleText = articleText.Replace(m.Value, aboutcall);
                }
            }

            // multiple {{distinguish}} into one
            oldArticleText = "";
            while (oldArticleText != articleText)
            {
                oldArticleText = articleText;
                bool doneDistinguishMerge = false;
                foreach (Match m in Tools.NestedTemplateRegex("distinguish").Matches(articleText))
                {
                    foreach (Match m2 in Tools.NestedTemplateRegex("distinguish").Matches(articleText))
                    {
                        if (m2.Value.Equals(m.Value))
                        {
                            continue;
                        }

                        articleText = articleText.Replace(m.Value, m.Value.TrimEnd('}') + m2.Groups[3].Value);

                        doneDistinguishMerge = true;
                        articleText          = articleText.Replace(m2.Value, "");
                        break;
                    }

                    if (doneDistinguishMerge)
                    {
                        break;
                    }
                }
            }

            return(articleText + restOfArticle);
        }
Ejemplo n.º 10
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="articleText"></param>
        /// <param name="articleTitle"></param>
        /// <returns></returns>
        internal string Sort(string articleText, string articleTitle)
        {
            if (Namespace.Determine(articleTitle) == 10) //Dont really want to be fooling around with templates
            {
                return(articleText);
            }

            string strSave = articleText;

            try
            {
                articleText = Regex.Replace(articleText, "<!-- ?\\[\\[en:.*?\\]\\] ?-->", "");

                string strPersonData = Newline(RemovePersonData(ref articleText));
                string strDisambig   = Newline(RemoveDisambig(ref articleText));
                string strCategories = Newline(RemoveCats(ref articleText, articleTitle));
                string strInterwikis = Newline(Interwikis(ref articleText));

                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests#Move_orphan_tags_on_the_top
                // Dablinks above orphan tags per [[WP:LAYOUT]]
                if (Variables.LangCode == LangCodeEnum.en)
                {
                    articleText = MoveOrphanTags(articleText);
                }

                articleText = MoveDablinks(articleText);

                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests#Placement_of_portal_template
                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Feature_requests.html#Move_nofootnotes_to_the_references_section
                if (Variables.LangCode == LangCodeEnum.en)
                {
                    articleText = MovePortalTemplates(articleText);
                    articleText = MoveMoreNoFootnotes(articleText);
                    articleText = MoveExternalLinks(articleText);
                    articleText = MoveSeeAlso(articleText);
                }

                // two newlines here per http://en.wikipedia.org/w/index.php?title=Wikipedia_talk:AutoWikiBrowser&oldid=243224092#Blank_lines_before_stubs
                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs#Two_empty_lines_before_stub-templates
                // Russian wiki uses only one newline
                string strStub = Newline(RemoveStubs(ref articleText), Variables.LangCode == LangCodeEnum.ru ? 1 : 2);

                //filter out excess white space and remove "----" from end of article
                articleText  = Parsers.RemoveWhiteSpace(articleText) + "\r\n";
                articleText += strDisambig;

                switch (Variables.LangCode)
                {
                case LangCodeEnum.de:
                case LangCodeEnum.sl:
                    articleText += strStub + strCategories + strPersonData;

                    // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser#Removal_of_blank_lines
                    // on de wiki a blank line is desired between persondata and interwikis
                    if (Variables.LangCode == LangCodeEnum.de && strPersonData.Length > 0 && strInterwikis.Length > 0)
                    {
                        articleText += "\r\n";
                    }
                    break;

                case LangCodeEnum.pl:
                case LangCodeEnum.ru:
                case LangCodeEnum.simple:
                    articleText += strPersonData + strStub + strCategories;
                    break;

                default:
                    articleText += strPersonData + strCategories + strStub;
                    break;
                }
                return(articleText + strInterwikis);
            }
            catch (Exception ex)
            {
                if (!ex.Message.Contains("DEFAULTSORT"))
                {
                    ErrorHandler.Handle(ex);
                }
                return(strSave);
            }
        }
Ejemplo n.º 11
0
 public override bool Check(ref string ArticleText, ref string ArticleTitle, string ArticleTimestamp, string ArticleRestrictions)
 {
     return(namespaces.Contains(Namespace.Determine(ArticleTitle)));
 }
Ejemplo n.º 12
0
        /// <summary>
        /// Sorts article meta data
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <param name="fixOptionalWhitespace">Whether to request optional excess whitespace to be fixed</param>
        /// <returns>The updated article text</returns>
        internal string Sort(string articleText, string articleTitle, bool fixOptionalWhitespace)
        {
            if (Namespace.Determine(articleTitle) == Namespace.Template)         // Don't sort on templates
            {
                return(articleText);
            }

            // short pages monitor check for en-wiki: keep at very end of article if present
            // See [[Template:Long comment/doc]]
            // SPM regex quick check for performance on long pages
            string shortPagesMonitor = "";

            if (Variables.LangCode.Equals("en") && SPMQuick.IsMatch(articleText))
            {
                Match spm = WikiRegexes.ShortPagesMonitor.Match(articleText);

                if (spm.Success)
                {
                    articleText       = WikiRegexes.ShortPagesMonitor.Replace(articleText, "").TrimEnd();
                    shortPagesMonitor = spm.Value.TrimEnd();
                }
            }

            articleText = CommentedOutEnInterwiki.Replace(articleText, "");

            string personData = Tools.Newline(RemovePersonData(ref articleText));
            string disambig   = Tools.Newline(RemoveDisambig(ref articleText));
            string categories = Tools.Newline(RemoveCats(ref articleText, articleTitle));
            string interwikis = Tools.Newline(Interwikis(ref articleText));

            if (Namespace.IsMainSpace(articleTitle))
            {
                // maintenance templates above infoboxes etc., zeroth section only
                if (Variables.LangCode.Equals("en"))
                {
                    string zerothSection = WikiRegexes.ZerothSection.Match(articleText).Value;
                    string restOfArticle = articleText.Substring(zerothSection.Length);
                    articleText = MoveMaintenanceTags(zerothSection) + restOfArticle;
                }

                // Dablinks above maintance tags per [[WP:LAYOUT]]
                articleText = MoveDablinks(articleText);

                if (Variables.LangCode.Equals("en"))
                {
                    articleText = MovePortalTemplates(articleText);
                    articleText = MoveTemplateToSeeAlsoSection(articleText, WikiRegexes.WikipediaBooks);
                    articleText = MoveSisterlinks(articleText);
                    articleText = MoveTemplateToReferencesSection(articleText, WikiRegexes.Ibid);
                    articleText = MoveExternalLinks(articleText);
                    articleText = MoveSeeAlso(articleText);
                }
            }
            // two newlines here per https://en.wikipedia.org/w/index.php?title=Wikipedia_talk:AutoWikiBrowser&oldid=243224092#Blank_lines_before_stubs
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Two_empty_lines_before_stub-templates
            // ru, sl, ar, arz wikis use only one newline
            string strStub = "";

            // Category: can use {{Verylargestub}}/{{popstub}} which is not a stub template, don't do stub sorting
            if (!Namespace.Determine(articleTitle).Equals(Namespace.Category))
            {
                strStub = Tools.Newline(RemoveStubs(ref articleText), (Variables.LangCode.Equals("ru") || Variables.LangCode.Equals("sl") || Variables.LangCode.Equals("ar") || Variables.LangCode.Equals("arz")) ? 1 : 2);
            }

            // filter out excess white space and remove "----" from end of article
            articleText  = Parsers.RemoveWhiteSpace(articleText, fixOptionalWhitespace) + "\r\n";
            articleText += disambig;
            articleText  = WikiRegexes.MultipleIssues.Replace(articleText, m => Regex.Replace(m.Value, "(\r\n)+", "\r\n"));

            switch (Variables.LangCode)
            {
            case "de":
            case "sl":
                articleText += strStub + categories + personData;

                // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser#Removal_of_blank_lines
                // on de wiki a blank line is desired between persondata and interwikis
                if (Variables.LangCode.Equals("de") && personData.Length > 0 && interwikis.Length > 0)
                {
                    articleText += "\r\n";
                }
                break;

            case "ar":
            case "arz":
            case "cs":
            case "el":
            case "pl":
            case "ru":
            case "simple":
                articleText += personData + strStub + categories;
                break;

            case "it":
                if (Variables.Project == ProjectEnum.wikiquote)
                {
                    articleText += personData + strStub + categories;
                }
                else
                {
                    articleText += personData + categories + strStub;
                }
                break;

            default:
                articleText += personData + categories + strStub;
                break;
            }
            articleText = (articleText + interwikis);

            // Only trim start on Category namespace, restore any saved short page monitor text
            return((Namespace.Determine(articleTitle) == Namespace.Category ?  articleText.Trim() : articleText.TrimEnd()) + shortPagesMonitor);
        }
Ejemplo n.º 13
0
        // Covered by: LinkTests.TestSimplifyLinks()
        /// <summary>
        /// Simplifies some links in article wiki text such as changing [[Dog|Dogs]] to [[Dog]]s
        /// Fixes CHECKWIKI error 64
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <returns>The simplified article text.</returns>
        public static string SimplifyLinks(string articleText)
        {
            // Performance: first get a list of unique links to avoid processing duplicate links more than once
            List <string> pipedLinks = Tools.DeduplicateList(GetAllWikiLinks(articleText)).FindAll(link => link.Contains("|"));

            // Performance: second determine if any links with pipe whitespace to clean
            string categoryNS           = Variables.Namespaces.ContainsKey(Namespace.Category) ? Variables.Namespaces[Namespace.Category] : "Category:";
            bool   whitespaceTrimNeeded = pipedLinks.Any(s => ((s.Contains("| ") && !s.Contains(categoryNS)) || s.Contains(" |") || (!s.Contains("| ]]") && s.Contains(" ]]"))));

            foreach (string pipedlink in pipedLinks)
            {
                Match m = WikiRegexes.PipedWikiLink.Match(pipedlink);

                // don't process if only matched part of link eg link is [[Image:...]] link with nested wikilinks
                if (pipedlink.Length != m.Length)
                {
                    continue;
                }

                string a = m.Groups[1].Value.Trim(), b = m.Groups[2].Value;

                // Must retain space after pipe in Category namespace
                if (whitespaceTrimNeeded)
                {
                    b = (Namespace.Determine(a) != Namespace.Category)
                    ? m.Groups[2].Value.Trim()
                    : m.Groups[2].Value.TrimEnd(new[] { ' ' });

                    if (b.Length == 0)
                    {
                        continue;
                    }
                }

                string lb = Tools.TurnFirstToLower(b), la = Tools.TurnFirstToLower(a);

                if (pipedlink.IndexOfAny("&%_".ToCharArray()) > -1) // check for performance
                {
                    string cb = CanonicalizeTitle(b), ca = CanonicalizeTitle(a);

                    if (cb.Equals(a) || cb.Equals(la) || ca.Equals(b) || ca.Equals(lb)) // target and text the same after cleanup and case conversion e.g. [[A|a]] or [[Foo_bar|Foo bar]] etc.
                    {
                        articleText = articleText.Replace(pipedlink, "[[" + b.Replace("_", " ") + "]]");
                    }
                }

                foreach (string punct in new List <string>(new [] { ".", "," }))
                {
                    if (lb.Equals(la + punct))
                    {
                        articleText = articleText.Replace(pipedlink, "[[" + b.Substring(0, b.Length - 1) + "]]" + punct);
                    }
                }

                // [[dog|(dog)]] --> ([[dog]])
                if (lb.Equals("(" + la + ")"))
                {
                    articleText = articleText.Replace(pipedlink, "([[" + b.Substring(1, b.Length - 2) + "]])");
                }

                if (lb.StartsWith(la, StringComparison.Ordinal)) // target is substring of text e.g. [[Dog|Dogs]] --> [[Dog]]s
                {
                    bool doBreak = false;
                    foreach (char ch in b.Remove(0, a.Length))
                    {
                        if (!char.IsLower(ch))
                        {
                            doBreak = true;
                            break;
                        }
                    }
                    if (doBreak)
                    {
                        continue;
                    }
                    articleText = articleText.Replace(pipedlink, "[[" + b.Substring(0, a.Length) + "]]" + b.Substring(a.Length));
                }
                else if (whitespaceTrimNeeded) // whitespace trimming around the pipe to apply
                {
                    string newlink = "[[" + a + "|" + b + "]]";

                    // fix all pipe whitespace in file/image links
                    if (WikiRegexes.FileNamespaceLink.IsMatch(newlink))
                    {
                        newlink = Regex.Replace(newlink, @" *\| *", @"|");
                    }

                    if (newlink != pipedlink)
                    {
                        articleText = articleText.Replace(pipedlink, newlink);
                    }
                }
            }

            return(articleText);
        }
Ejemplo n.º 14
0
        // Partially covered by FixMainArticleTests.SelfLinkRemoval()
        /// <summary>
        /// Fixes link syntax, including removal of self links.
        /// Underscores not removed from link where page in [[Category:Articles with underscores in the title]]
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <param name="noChange">Value that indicated whether no change was made.</param>
        /// <returns>The modified article text.</returns>
        public static string FixLinks(string articleText, string articleTitle, out bool noChange)
        {
            string articleTextAtStart = articleText;
            string escTitle           = Regex.Escape(articleTitle);

            if (TemplateExists(GetAllTemplates(articleText), InfoBoxSingleAlbum))
            {
                articleText = FixLinksInfoBoxSingleAlbum(articleText, articleTitle);
            }

            // clean up wikilinks: replace underscores, percentages and URL encoded accents etc.
            List <string> wikiLinks = Tools.DeduplicateList(GetAllWikiLinks(articleText));

            // Replace {{!}} with a standard pipe
            if (GetAllTemplates(articleText).Contains("!"))
            {
                foreach (string e in wikiLinks.Where(l => l.Contains(@"{{!}}") && !l.Contains("|")))
                {
                    articleText = articleText.Replace(e, e.Replace(@"{{!}}", "|"));
                }
            }

            // See if any self interwikis that need fixing later
            bool hasAnySelfInterwikis = wikiLinks.Any(l => l.Contains(Variables.LangCode + ":"));

            // Performance: on articles with lots of links better to filter down to those that could be changed by canonicalization, rather than running regex replace against all links
            foreach (string l in wikiLinks.Where(link => link.IndexOfAny("&%_".ToCharArray()) > -1))
            {
                string res = WikiRegexes.WikiLink.Replace(l, FixLinksWikilinkCanonicalizeME);
                if (res != l)
                {
                    articleText = articleText.Replace(l, res);
                }
            }

            // First check for performance, second to avoid (dodgy) apostrophe after link
            if (wikiLinks.Any(link => link.Contains("|''")) && !articleText.Contains(@"']]'"))
            {
                articleText = WikiRegexes.PipedWikiLink.Replace(articleText, FixLinksWikilinkBoldItalicsME);
            }

            // fix excess trailing pipe, TrailingPipe regex for performance
            if (wikiLinks.Any(link => link.Contains("|") && TrailingPipe.IsMatch(link)))
            {
                articleText = WikiRegexes.PipedWikiLink.Replace(articleText, m => (m.Groups[2].Value.Trim().EndsWith("|") ? "[[" + m.Groups[1].Value + "|" + m.Groups[2].Value.Trim().TrimEnd('|').Trim() + "]]" : m.Value));
            }

            // fix excess leading pipe in piped link e.g. [[|foo|bar]], avoid malformatted image links
            foreach (string pl in wikiLinks.Where(link => link.Substring(2, 1).Equals("|") && link.Substring(3).Contains("|") &&
                                                  !link.Substring(3).StartsWith("thumb")))
            {
                articleText = articleText.Replace(pl, @"[[" + pl.Substring(3).TrimStart());
            }

            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Your_code_creates_page_errors_inside_imagemap_tags.
            // don't apply if there's an imagemap on the page or some noinclude transclusion business
            // https://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Includes_and_selflinks
            // TODO, better to not apply to text within imagemaps
            if (Regex.IsMatch(articleText, @"\[\[\s*(" + Tools.CaseInsensitive(escTitle) + @")\s*(?:\]|\|)") &&
                !WikiRegexes.ImageMap.IsMatch(articleText) &&
                !WikiRegexes.IncludeonlyNoinclude.IsMatch(articleText) &&
                !TaxoboxColour.IsMatch(articleText))
            {
                // remove any self-links, but not other links with different capitaliastion e.g. [[Foo]] vs [[FOO]]
                articleText = Regex.Replace(articleText, @"\[\[\s*(" + Tools.CaseInsensitive(escTitle)
                                            + @")\s*\]\]", "$1");

                // remove piped self links by leaving target, or removing when is a category with self category listed
                articleText = Regex.Replace(articleText, @"\[\[\s*" + Tools.CaseInsensitive(escTitle)
                                            + @"\s*\|\s*([^\]]+)\s*\]\]", (Namespace.Determine(articleTitle) == Namespace.Category ? "" : "$1"));
            }

            // fix for self interwiki links, not for monolingual projects
            if (hasAnySelfInterwikis && !Variables.IsWikimediaMonolingualProject)
            {
                articleText = FixSelfInterwikis(articleText);
            }

            noChange = articleText.Equals(articleTextAtStart);
            return(articleText);
        }
        /// <summary>
        /// Sorts article meta data
        /// </summary>
        /// <param name="articleText">The wiki text of the article.</param>
        /// <param name="articleTitle">Title of the article</param>
        /// <param name="fixOptionalWhitespace">Whether to request optional excess whitespace to be fixed</param>
        /// <returns>The updated article text</returns>
        internal string Sort(string articleText, string articleTitle, bool fixOptionalWhitespace)
        {
            if (Namespace.Determine(articleTitle) == Namespace.Template) // Don't sort on templates
            {
                return(articleText);
            }

            string strSave = articleText;

            try
            {
                articleText = Regex.Replace(articleText, "<!-- ?\\[\\[en:.*?\\]\\] ?-->", "");

                string personData = Tools.Newline(RemovePersonData(ref articleText));
                string disambig   = Tools.Newline(RemoveDisambig(ref articleText));
                string categories = Tools.Newline(RemoveCats(ref articleText, articleTitle));
                string interwikis = Tools.Newline(Interwikis(ref articleText));

                // Dablinks above orphan tags per [[WP:LAYOUT]]
                if (Variables.LangCode.Equals("en"))
                {
                    articleText = MoveMaintenanceTags(articleText);
                }

                articleText = MoveDablinks(articleText);

                if (Variables.LangCode.Equals("en"))
                {
                    articleText = MovePortalTemplates(articleText);
                    articleText = MoveTemplateToSeeAlsoSection(articleText, Tools.NestedTemplateRegex("Wikipedia-Books"));
                    articleText = MoveSisterlinks(articleText);
                    articleText = MoveTemplateToReferencesSection(articleText, WikiRegexes.Ibid);
                    articleText = MoveExternalLinks(articleText);
                    articleText = MoveSeeAlso(articleText);
                }

                // two newlines here per http://en.wikipedia.org/w/index.php?title=Wikipedia_talk:AutoWikiBrowser&oldid=243224092#Blank_lines_before_stubs
                // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser/Bugs/Archive_11#Two_empty_lines_before_stub-templates
                // Russian wiki uses only one newline
                string strStub = "";

                // Category: can use {{Verylargestub}}/{{popstub}} which is not a stub template, don't do stub sorting
                if (!Namespace.Determine(articleTitle).Equals(Namespace.Category))
                {
                    strStub = Tools.Newline(RemoveStubs(ref articleText), Variables.LangCode.Equals("ru") ? 1 : 2);
                }

                //filter out excess white space and remove "----" from end of article
                articleText  = Parsers.RemoveWhiteSpace(articleText, fixOptionalWhitespace) + "\r\n";
                articleText += disambig;

                switch (Variables.LangCode)
                {
                case "de":
                case "sl":
                    articleText += strStub + categories + personData;

                    // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser#Removal_of_blank_lines
                    // on de wiki a blank line is desired between persondata and interwikis
                    if (Variables.LangCode.Equals("de") && personData.Length > 0 && interwikis.Length > 0)
                    {
                        articleText += "\r\n";
                    }
                    break;

                case "pl":
                case "ru":
                case "simple":
                    articleText += personData + strStub + categories;
                    break;

                case "it":
                    if (Variables.Project == ProjectEnum.wikiquote)
                    {
                        articleText += personData + strStub + categories;
                    }
                    else
                    {
                        articleText += personData + categories + strStub;
                    }
                    break;

                default:
                    articleText += personData + categories + strStub;
                    break;
                }
                articleText = (articleText + interwikis);

                if (Namespace.Determine(articleTitle) == Namespace.Category)
                {
                    return(articleText.Trim());
                }
                else
                {
                    return(articleText.TrimEnd());
                }
            }
            catch (Exception ex)
            {
                if (!ex.Message.Contains("DEFAULTSORT"))
                {
                    ErrorHandler.Handle(ex);
                }
                return(strSave);
            }
        }
Ejemplo n.º 16
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="ArticleText"></param>
        /// <param name="ArticleTitle"></param>
        /// <returns></returns>
        internal string Sort(string ArticleText, string ArticleTitle)
        {
            if (Namespace.Determine(ArticleTitle) == 10) //Dont really want to be fooling around with templates
            {
                return(ArticleText);
            }

            string strSave = ArticleText;

            try
            {
                ArticleText = Regex.Replace(ArticleText, "<!-- ?\\[\\[en:.*?\\]\\] ?-->", "");

                string strPersonData = Newline(removePersonData(ref ArticleText));
                string strDisambig   = Newline(removeDisambig(ref ArticleText));
                string strCategories = Newline(removeCats(ref ArticleText, ArticleTitle));
                string strInterwikis = Newline(interwikis(ref ArticleText));

                ArticleText = moveDablinks(ArticleText);

                // two newlines here per http://en.wikipedia.org/w/index.php?title=Wikipedia_talk:AutoWikiBrowser&oldid=243224092#Blank_lines_before_stubs
                string strStub = Newline(removeStubs(ref ArticleText), 2);

                //filter out excess white space and remove "----" from end of article
                ArticleText  = Parsers.RemoveWhiteSpace(ArticleText) + "\r\n";
                ArticleText += strDisambig;

                switch (Variables.LangCode)
                {
                case LangCodeEnum.de:
                case LangCodeEnum.sl:
                    ArticleText += strStub + strCategories + strPersonData;

                    // http://en.wikipedia.org/wiki/Wikipedia_talk:AutoWikiBrowser#Removal_of_blank_lines
                    // on de wiki a blank line is desired between persondata and interwikis
                    if (Variables.LangCode == LangCodeEnum.de && strPersonData.Length > 0 && strInterwikis.Length > 0)
                    {
                        ArticleText += "\r\n";
                    }
                    break;

                case LangCodeEnum.pl:
                case LangCodeEnum.ru:
                case LangCodeEnum.simple:
                    ArticleText += strPersonData + strStub + strCategories;
                    break;

                default:
                    ArticleText += strPersonData + strCategories + strStub;
                    break;
                }
                return(ArticleText + strInterwikis);
            }
            catch (Exception ex)
            {
                if (!ex.Message.Contains("DEFAULTSORT"))
                {
                    ErrorHandler.Handle(ex);
                }
                return(strSave);
            }
        }