/// <summary>Gets page titles and page text from local XML dump. /// This function consumes much resources.</summary> /// <param name="filePathName">The path to and name of the XML dump file as string.</param> public void FillAndLoadFromXmlDump(string filePathName) { Console.WriteLine(Bot.Msg("Loading pages from XML dump...")); XmlReader reader = XmlReader.Create(filePathName); while (reader.ReadToFollowing("page")) { Page p = new Page(site); p.ParsePageXml(reader.ReadOuterXml()); pages.Add(p); } reader.Close(); Console.WriteLine(Bot.Msg("XML dump loaded successfully.")); }
/// <summary>Loads texts and metadata (revision ID, timestamp, last comment, /// last contributor, minor edit mark) for pages in this PageList. /// Non-existent pages will be automatically removed from the PageList. /// Please, don't use this function when going to edit big amount of pages on /// popular public wikis, as it compromises edit conflict detection. In that case, /// each page's text should be loaded individually right before its processing /// and saving.</summary> public void LoadWithMetadata() { if (IsEmpty()) throw new WikiBotException(Bot.Msg("The PageList is empty. Nothing to load.")); Console.WriteLine(Bot.Msg("Loading {0} pages..."), pages.Count); string res = site.indexPath + "?title=Special:Export&action=submit"; string postData = "curonly=True&pages="; foreach (Page page in pages) postData += HttpUtility.UrlEncode(page.title) + "\r\n"; string src = site.PostDataAndGetResult(res, postData); XmlReader reader = XmlReader.Create(new StringReader(src)); PageList pl = new PageList(site); while (reader.ReadToFollowing("page")) { Page p = new Page(site); p.ParsePageXml(reader.ReadOuterXml()); pl.Add(p); } reader.Close(); if (pages.Count > 0) { Clear(); pages = pl.pages; return; } else { // FALLBACK, use alternative parsing way, XPath Console.WriteLine( Bot.Msg("XML parsing failed, switching to alternative parser..."), pages.Count); src = Bot.RemoveXMLRootAttributes(src); StringReader strReader = new StringReader(src); XPathDocument doc = new XPathDocument(strReader); strReader.Close(); XPathNavigator nav = doc.CreateNavigator(); foreach (Page page in pages) { if (page.title.Contains("'")) { // There's no good way to escape "'" in XPath page.LoadWithMetadata(); continue; } string query = "//page[title='" + page.title + "']/"; try { page.text = nav.SelectSingleNode(query + "revision/text").InnerXml; } catch (System.NullReferenceException) { continue; } page.text = HttpUtility.HtmlDecode(page.text); page.pageId = nav.SelectSingleNode(query + "id").InnerXml; try { page.lastUser = nav.SelectSingleNode(query + "revision/contributor/username").InnerXml; page.lastUserId = nav.SelectSingleNode(query + "revision/contributor/id").InnerXml; } catch (System.NullReferenceException) { page.lastUser = nav.SelectSingleNode(query + "revision/contributor/ip").InnerXml; } page.lastUser = HttpUtility.HtmlDecode(page.lastUser); page.revision = nav.SelectSingleNode(query + "revision/id").InnerXml; page.lastMinorEdit = (nav.SelectSingleNode(query + "revision/minor") == null) ? false : true; try { page.comment = nav.SelectSingleNode(query + "revision/comment").InnerXml; page.comment = HttpUtility.HtmlDecode(page.comment); } catch (System.NullReferenceException) {;} page.timestamp = nav.SelectSingleNode(query + "revision/timestamp").ValueAsDateTime; } if (string.IsNullOrEmpty(pages[0].text)) { // FALLBACK 2, load pages one-by-one foreach (Page page in pages) page.LoadWithMetadata(); } } }