예제 #1
0
 /// <summary>Gets page titles and page text from local XML dump.
 /// This function consumes much resources.</summary>
 /// <param name="filePathName">The path to and name of the XML dump file as string.</param>
 public void FillAndLoadFromXmlDump(string filePathName)
 {
     Console.WriteLine(Bot.Msg("Loading pages from XML dump..."));
     XmlReader reader = XmlReader.Create(filePathName);
     while (reader.ReadToFollowing("page")) {
         Page p = new Page(site);
         p.ParsePageXml(reader.ReadOuterXml());
         pages.Add(p);
     }
     reader.Close();
     Console.WriteLine(Bot.Msg("XML dump loaded successfully."));
 }
예제 #2
0
        /// <summary>Loads texts and metadata (revision ID, timestamp, last comment,
        /// last contributor, minor edit mark) for pages in this PageList.
        /// Non-existent pages will be automatically removed from the PageList.
        /// Please, don't use this function when going to edit big amount of pages on
        /// popular public wikis, as it compromises edit conflict detection. In that case,
        /// each page's text should be loaded individually right before its processing
        /// and saving.</summary>
        public void LoadWithMetadata()
        {
            if (IsEmpty())
                throw new WikiBotException(Bot.Msg("The PageList is empty. Nothing to load."));
            Console.WriteLine(Bot.Msg("Loading {0} pages..."), pages.Count);

            string res = site.indexPath + "?title=Special:Export&action=submit";
            string postData = "curonly=True&pages=";
            foreach (Page page in pages)
                postData += HttpUtility.UrlEncode(page.title) + "\r\n";
            string src = site.PostDataAndGetResult(res, postData);
            XmlReader reader = XmlReader.Create(new StringReader(src));
            PageList pl = new PageList(site);
            while (reader.ReadToFollowing("page")) {
                Page p = new Page(site);
                p.ParsePageXml(reader.ReadOuterXml());
                pl.Add(p);
            }
            reader.Close();
            if (pages.Count > 0) {
                Clear();
                pages = pl.pages;
                return;
            }
            else {    // FALLBACK, use alternative parsing way, XPath
                Console.WriteLine(
                    Bot.Msg("XML parsing failed, switching to alternative parser..."), pages.Count);
                src = Bot.RemoveXMLRootAttributes(src);
                StringReader strReader = new StringReader(src);
                XPathDocument doc = new XPathDocument(strReader);
                strReader.Close();
                XPathNavigator nav = doc.CreateNavigator();
                foreach (Page page in pages) {
                    if (page.title.Contains("'")) {    // There's no good way to escape "'" in XPath
                        page.LoadWithMetadata();
                        continue;
                    }
                    string query = "//page[title='" + page.title + "']/";
                    try {
                        page.text =
                            nav.SelectSingleNode(query + "revision/text").InnerXml;
                    }
                    catch (System.NullReferenceException) {
                        continue;
                    }
                    page.text = HttpUtility.HtmlDecode(page.text);
                    page.pageId = nav.SelectSingleNode(query + "id").InnerXml;
                    try {
                        page.lastUser = nav.SelectSingleNode(query +
                            "revision/contributor/username").InnerXml;
                        page.lastUserId = nav.SelectSingleNode(query +
                            "revision/contributor/id").InnerXml;
                    }
                    catch (System.NullReferenceException) {
                        page.lastUser = nav.SelectSingleNode(query +
                            "revision/contributor/ip").InnerXml;
                    }
                    page.lastUser = HttpUtility.HtmlDecode(page.lastUser);
                    page.revision = nav.SelectSingleNode(query + "revision/id").InnerXml;
                    page.lastMinorEdit = (nav.SelectSingleNode(query +
                        "revision/minor") == null) ? false : true;
                    try {
                        page.comment = nav.SelectSingleNode(query + "revision/comment").InnerXml;
                        page.comment = HttpUtility.HtmlDecode(page.comment);
                    }
                    catch (System.NullReferenceException) {;}
                    page.timestamp =
                        nav.SelectSingleNode(query + "revision/timestamp").ValueAsDateTime;
                }

                if (string.IsNullOrEmpty(pages[0].text)) {    // FALLBACK 2, load pages one-by-one
                    foreach (Page page in pages)
                        page.LoadWithMetadata();
                }
            }
        }