public override Dictionary<string, string> Process(Wiki wiki, WikiPage page, ref int diffSize, ref int topicsArchived) { List<WikiPageSection> archivedSections = new List<WikiPageSection>(); foreach (WikiPageSection section in page.Sections) { WikiPageSection result = section.Subsections.FirstOrDefault(ss => ss.Title.Trim().ToLower() == "итог"); bool forceArchivation = LookForLines.Any(s => section.Text.ToLower().Contains(s.ToLower())); if (!OnHold.Any(s => section.Text.ToLower().Contains(s.ToLower())) && ((result != null && !string.IsNullOrEmpty(result.SectionText.Trim())) || forceArchivation || !CheckForResult)) { MatchCollection ms = timeRE.Matches(FilterQuotes(section.Text)); DateTime published = DateTime.Today; DateTime lastReply = DateTime.MinValue; foreach (Match match in ms) { string value = match.Groups[1].Value; DateTime time = DateTime.Parse(value, L10i.Culture, DateTimeStyles.AssumeUniversal); if (time < published) { published = time; } if (time > lastReply) { lastReply = time; } } if (lastReply != DateTime.MinValue && ((forceArchivation && (DateTime.Today - lastReply).TotalHours >= ForcedArchivationDelay) || (DateTime.Today - lastReply).TotalHours >= Delay)) { archivedSections.Add(section); } } if (IsMovedSection(section)) { section.SectionText = section.SectionText.Trim(new char[] { ' ', '\t', '\n' }) + "\n~~~~\n"; archivedSections.Add(section); } } Dictionary<string, string> archiveTexts = new Dictionary<string, string>(); if (archivedSections.Count == 0) { diffSize = 0; return archiveTexts; } var parameters = new ParameterCollection() { { "prop", "info" }, }; XmlDocument xml = wiki.Query(QueryBy.Titles, parameters, MainPage); int ns = int.Parse(xml.SelectSingleNode("//page").Attributes["ns"].Value); string prefix = wiki.GetNamespace(ns) + ":"; parameters = new ParameterCollection() { { "list", "allpages" }, { "apprefix", MainPage.Substring(prefix.Length) }, { "apnamespace", ns.ToString() } }; xml = wiki.Enumerate(parameters, true); int maxNumber = 1; foreach (XmlNode p in xml.SelectNodes("//p")) { string title = p.Attributes["title"].Value; string format = Format.Replace("{0}", ""); if (title.StartsWith(format)) { int number; if (int.TryParse(title.Substring(format.Length), out number)) { if (number > maxNumber) { maxNumber = number; } } } } int index = 0; string pageName = string.Format(Format, maxNumber); parameters.Clear(); parameters.Add("prop", "info"); xml = wiki.Query(QueryBy.Titles, parameters, new string[] { pageName }); XmlNode node = xml.SelectSingleNode("//page"); if (node.Attributes["missing"] == null) { string pageFileName = _cacheDir + Cache.GenerateCachePath(pageName); string text = Cache.LoadPageFromCache(pageFileName, node.Attributes["lastrevid"].Value, pageName); if (string.IsNullOrEmpty(text)) { Console.Out.WriteLine("Downloading " + pageName + "..."); text = wiki.LoadText(pageName); Cache.CachePage(pageName, _cacheDir, node.Attributes["lastrevid"].Value, text); } WikiPage archivePage = WikiPage.Parse(pageName, text); if (archivePage.Sections.Count < Topics) { int topics = Topics - archivePage.Sections.Count; for (int i = 0; i < topics && index < archivedSections.Count; ++i, ++index) { WikiPageSection section = archivedSections[index]; section.Title = ProcessSectionTitle(section.Title); archivePage.Sections.Add(section); } if (NewSectionsDown) { archivePage.Sections.Sort(SectionsDown); } else { archivePage.Sections.Sort(SectionsUp); } if (!string.IsNullOrEmpty(RemoveFromText)) { archivePage.Text = archivePage.Text.Replace(RemoveFromText, ""); } archiveTexts.Add(pageName, archivePage.Text); } } if (index < archivedSections.Count) { string text = Header; pageName = string.Format(Format, maxNumber + 1); WikiPage archivePage = WikiPage.Parse(pageName, text); for (; index < archivedSections.Count; ++index) { WikiPageSection section = archivedSections[index]; section.Title = ProcessSectionTitle(section.Title); archivePage.Sections.Add(section); } archivePage.Sections.Sort(SectionsUp); if (!string.IsNullOrEmpty(RemoveFromText)) { archivePage.Text = archivePage.Text.Replace(RemoveFromText, ""); } archiveTexts.Add(pageName, archivePage.Text); } topicsArchived = 0; diffSize = 0; foreach (var section in archivedSections) { diffSize += Encoding.UTF8.GetByteCount(section.Text); ++topicsArchived; page.Sections.Remove(section); } return archiveTexts; }
private void PutNotification(Wiki wiki, string title, string date) { string talkPage = wiki.GetNamespace(1) + ":" + title; Console.Out.WriteLine("Updating " + talkPage + "..."); try { ParameterCollection parameters = new ParameterCollection(); parameters.Add("rvprop", "content"); parameters.Add("rvsection", "0)"); parameters.Add("prop", "revisions"); XmlDocument xml = wiki.Query(QueryBy.Titles, parameters, new string[] { talkPage }); string content; XmlNode node = xml.SelectSingleNode("//rev"); if (node != null && node.FirstChild != null) { content = node.FirstChild.Value; } else { content = ""; } int index = content.IndexOf("{{" + _l10i.NotificationTemplate + "|", StringComparison.CurrentCultureIgnoreCase); if (index != -1) { int endIndex = content.IndexOf("}}", index); if (endIndex != -1) { content = content.Insert(endIndex, "|" + date); } } else { index = content.IndexOf("{{talkheader", StringComparison.CurrentCultureIgnoreCase); if (index != -1) { int endIndex = content.IndexOf("}}", index); if (endIndex != -1) { content = content.Insert(endIndex + 2, "\n{{" + _l10i.NotificationTemplate + "|" + date + "}}\n"); } } else { index = content.IndexOf("{{заголовок обсуждения", StringComparison.CurrentCultureIgnoreCase); if (index != -1) { int endIndex = content.IndexOf("}}", index); if (endIndex != -1) { content = content.Insert(endIndex + 2, "\n{{" + _l10i.NotificationTemplate + "|" + date + "}}\n"); } } else { content = content.Insert(0, "\n{{" + _l10i.NotificationTemplate + "|" + date + "}}\n"); } } } wiki.SaveSection(talkPage, "0", content, _l10i.MainPageUpdateComment); } catch (WikiException e) { Console.Out.WriteLine("Failed to update " + talkPage + ":" + e.Message); } }
public void UpdatePages(Wiki wiki) { Console.Out.WriteLine("Updating articles for deletion..."); Regex wikiLinkRE = new Regex(@"\[{2}(.+?)(\|.+?)?]{2}"); Regex timeRE = new Regex(@"(\d{2}:\d{2}\, \d\d? [а-я]+ \d{4}) \(UTC\)"); ParameterCollection parameters = new ParameterCollection(); parameters.Add("generator", "categorymembers"); parameters.Add("gcmtitle", _l10i.Category); parameters.Add("gcmlimit", "max"); parameters.Add("gcmnamespace", "4"); parameters.Add("prop", "info|revisions"); parameters.Add("intoken", "edit"); XmlDocument doc = wiki.Enumerate(parameters, true); string queryTimestamp = DateTime.Now.ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ"); XmlNodeList pages = doc.SelectNodes("//page"); foreach (XmlNode page in pages) { string starttimestamp = queryTimestamp; int results = 0; string prefix = _l10i.MainPage + "/"; string pageName = page.Attributes["title"].Value; string basetimestamp = page.FirstChild.FirstChild.Attributes["timestamp"].Value; string editToken = page.Attributes["edittoken"].Value; if (pageName.Length < prefix.Length) { continue; } string date = pageName.Substring(prefix.Length); Day day = new Day(); if (!DateTime.TryParse(date, CultureInfo.CreateSpecificCulture(_l10i.Culture), DateTimeStyles.AssumeUniversal, out day.Date)) { continue; } string text = ""; string fileName = _cacheDir + date + ".bin"; if (File.Exists(fileName)) { using (FileStream fs = new FileStream(fileName, FileMode.Open)) using (GZipStream gs = new GZipStream(fs, CompressionMode.Decompress)) using (TextReader sr = new StreamReader(gs)) { string revid = sr.ReadLine(); if (revid == page.Attributes["lastrevid"].Value) { Console.Out.WriteLine("Loading " + pageName + "..."); text = sr.ReadToEnd(); } } } if (string.IsNullOrEmpty(text)) { try { Console.Out.WriteLine("Downloading " + pageName + "..."); text = wiki.LoadText(pageName); starttimestamp = DateTime.Now.ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ"); } catch (WikiPageNotFound) { continue; } using (FileStream fs = new FileStream(fileName, FileMode.Create)) using (GZipStream gs = new GZipStream(fs, CompressionMode.Compress)) using (StreamWriter sw = new StreamWriter(gs)) { sw.WriteLine(page.Attributes["lastrevid"].Value); sw.Write(text); } } List<string> titlesWithResults = new List<string>(); Dictionary<string, List<WikiPageSection>> titles = new Dictionary<string, List<WikiPageSection>>(); day.Page = WikiPage.Parse(pageName, text); foreach (WikiPageSection section in day.Page.Sections) { ReplaceEmptyResults(section); RemoveStrikeOut(section); StrikeOutSection(section); if (section.Subsections.Count(s => _l10i.Processor != null ? _l10i.Results.Any(r => r == _l10i.Processor(s).Trim()) : _l10i.Results.Any(r => r == s.Title.Trim())) == 0 && section.Subsections.Count(s => s.Title.Trim() == _l10i.ChallengedResult) == 0) { Match m = wikiLinkRE.Match(section.Title); if (m.Success) { string title = m.Groups[1].Value.Trim(); if (titles.ContainsKey(title)) { titles[title].Add(section); } else { titles.Add(title, new List<WikiPageSection>()); titles[title].Add(section); } } } { Match m = wikiLinkRE.Match(section.Title); if (m.Success && section.Title.Contains("<s>")) { titlesWithResults.Add(m.Groups[1].Value.Trim()); } List<WikiPageSection> sections = new List<WikiPageSection>(); section.Reduce(sections, SubsectionsList); foreach (WikiPageSection subsection in sections) { m = wikiLinkRE.Match(subsection.Title); if (m.Success && subsection.Title.Contains("<s>")) { titlesWithResults.Add(m.Groups[1].Value.Trim()); } if (m.Success && !subsection.Title.Contains("<s>") && subsection.Subsections.Count(s => s.Title.Trim() == _l10i.ChallengedResult) == 0 && subsection.Subsections.Count(s => _l10i.Processor != null ? _l10i.Results.Any(r => r == _l10i.Processor(s).Trim()) : _l10i.Results.Any(r => r == s.Title.Trim())) == 0) { string title = m.Groups[1].Value.Trim(); if (titles.ContainsKey(title)) { titles[title].Add(subsection); } else { titles.Add(title, new List<WikiPageSection>()); titles[title].Add(subsection); } } } } } parameters.Clear(); parameters.Add("prop", "info"); Dictionary<string, string> normalizedTitles = new Dictionary<string, string>(); XmlDocument xml = wiki.Query(QueryBy.Titles, parameters, titles.Keys); foreach (XmlNode node in xml.SelectNodes("//n")) { normalizedTitles.Add(node.Attributes["to"].Value, node.Attributes["from"].Value); } List<string> notificationList = new List<string>(); XmlNodeList missingTitles = xml.SelectNodes("//page"); foreach (XmlNode node in missingTitles) { string title = node.Attributes["title"].Value; IEnumerable<WikiPageSection> sections; if (titles.ContainsKey(title)) { sections = titles[title]; } else { sections = titles[normalizedTitles[title]]; } if (node.Attributes["missing"] != null) { DateTime start = day.Date; //foreach (WikiPageSection section in sections) //{ // Match m = timeRE.Match(section.Text); // if (m.Success) // { // start = DateTime.Parse(m.Groups[1].Value, // CultureInfo.CreateSpecificCulture(_l10i.Culture), // DateTimeStyles.AssumeUniversal); // } //} parameters.Clear(); parameters.Add("list", "logevents"); parameters.Add("letype", "delete"); parameters.Add("lemlimit", "max"); parameters.Add("lestart", start.ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")); parameters.Add("ledir", "newer"); parameters.Add("letitle", title); parameters.Add("leprop", "comment|type|user|timestamp"); XmlDocument log = wiki.Enumerate(parameters, true); XmlNodeList items = log.SelectNodes("//item"); List<DeleteLogEvent> events = new List<DeleteLogEvent>(); foreach (XmlNode item in items) { DeleteLogEvent ev = new DeleteLogEvent(); ev.Comment = item.Attributes["comment"].Value; ev.Deleted = item.Attributes["action"].Value == "delete"; ev.User = item.Attributes["user"].Value; ev.Timestamp = DateTime.Parse(item.Attributes["timestamp"].Value, null, DateTimeStyles.AssumeUniversal); events.Add(ev); } events.Sort(CompareDeleteLogEvents); if (events.Count > 0 && events[0].Deleted && (DateTime.Now - events[0].Timestamp).TotalHours > 2) { string comment = FilterWikiMarkup(events[0].Comment); string message = string.Format(_l10i.AutoResultMessage, events[0].User, events[0].Timestamp.ToUniversalTime().ToString(_l10i.DateFormat, CultureInfo.CreateSpecificCulture(_l10i.Culture)), comment); var pageSections = titles.ContainsKey(title) ? titles[title] : titles[normalizedTitles[title]]; foreach (WikiPageSection section in pageSections) { WikiPageSection verdict = new WikiPageSection(" " + _l10i.AutoResultSection + " ", section.Level + 1, message); section.AddSubsection(verdict); StrikeOutSection(section); ++results; } } } } if (_l10i.Culture != "ru-RU") { parameters.Clear(); parameters.Add("prop", "info"); xml = wiki.Query(QueryBy.Titles, parameters, titlesWithResults); foreach (XmlNode node in xml.SelectNodes("//page")) { if (node.Attributes["missing"] == null && node.Attributes["redirect"] == null && node.Attributes["ns"].Value == "0") { notificationList.Add(node.Attributes["title"].Value); } } if (notificationList.Count > 0) { parameters.Clear(); parameters.Add("list", "backlinks"); parameters.Add("bltitle", pageName); parameters.Add("blfilterredir", "nonredirects"); parameters.Add("blnamespace", "1"); parameters.Add("bllimit", "max"); XmlDocument backlinks = wiki.Enumerate(parameters, true); foreach (string title in notificationList) { string talkPage = wiki.GetNamespace(1) + ":" + title; if (backlinks.SelectSingleNode("//bl[@title=" + GenerateConcatForXPath(talkPage) + "]") == null) { PutNotification(wiki, title, date); } } } } string newText = day.Page.Text; if (newText.Trim() == text.Trim()) { continue; } try { Console.Out.WriteLine("Updating " + pageName + "..."); string revid = wiki.Save(pageName, "", newText, _l10i.StrikeOutComment + (results > 0 ? _l10i.AutoResultComment : ""), MinorFlags.Minor, CreateFlags.NoCreate, WatchFlags.None, SaveFlags.Replace, true, basetimestamp, "", editToken); using (FileStream fs = new FileStream(fileName, FileMode.Create)) using (GZipStream gs = new GZipStream(fs, CompressionMode.Compress)) using (StreamWriter sw = new StreamWriter(gs)) { sw.WriteLine(revid); sw.Write(newText); } } catch (WikiException) { } } }
public virtual string ProcessData(Wiki wiki, string text) { HashSet<string> ignore = new HashSet<string>(); foreach (var category in CategoriesToIgnore) { string fileName = "Cache\\" + Module.Language + "\\NewPages\\" + Cache.EscapePath(category) + ".txt"; using (TextReader streamReader = new StreamReader(fileName)) { string line; while ((line = streamReader.ReadLine()) != null) { string[] groups = line.Split(new char[] { '\t' }); if (groups[0] == Namespace.ToString()) { string title = groups[1].Replace('_', ' '); ignore.Add(title); } } } } var pageList = new List<Cache.PageInfo>(); var pages = new HashSet<string>(); foreach (var category in Categories) { string fileName = "Cache\\" + Module.Language + "\\NewPages\\" + Cache.EscapePath(category) + ".txt"; Console.Out.WriteLine("Processing data of " + category); using (TextReader streamReader = new StreamReader(fileName)) { string line; while ((line = streamReader.ReadLine()) != null) { string[] groups = line.Split(new char[] { '\t' }); if (groups[0] == Namespace.ToString()) { string title = groups[1].Replace('_', ' '); if (ignore.Contains(title)) { continue; } if (Namespace != 0) { title = wiki.GetNamespace(Namespace) + ":" + title; } long firstEditId = long.Parse(groups[5]); Cache.PageInfo page = new Cache.PageInfo(title, "", DateTime.MinValue, firstEditId); if (!pages.Contains(page.Title)) { pages.Add(page.Title); pageList.Add(page); } } } } } pageList.Sort(ComparePages); var subset = new List<string>(); for (int i = 0; i < pageList.Count && subset.Count < MaxItems; ++i) { Cache.PageInfo page = Cache.LoadPageInformation(wiki, Module.Language, pageList[i].Title); if (page != null && !UsersToIgnore.Contains(page.Author)) { string element = string.Format(Format, Namespace != 0 ? page.Title.Substring(wiki.GetNamespace(Namespace).Length + 1) : page.Title, page.Author, page.FirstEdit.ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")); if (!subset.Contains(element)) { subset.Add(element); } } } if (subset.Count < MaxItems) { string oldText = text; if (!string.IsNullOrEmpty(Header) && text.StartsWith(Header)) { oldText = oldText.Substring(Header.Length); } if (!string.IsNullOrEmpty(Footer) && oldText.EndsWith(Footer)) { oldText = oldText.Substring(0, oldText.Length - Footer.Length); } string[] items = oldText.Split(new string[] { Delimeter }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < items.Length && subset.Count < MaxItems; ++i) { if (!subset.Exists(l => l == items[i])) { subset.Add(items[i]); } } } return Header + string.Join(Delimeter, subset.ToArray()) + Footer; }
public Dictionary<string, string> Process(Wiki wiki) { HashSet<string> ignore = new HashSet<string>(); foreach (var category in CategoriesToIgnore) { string fileName = "Cache\\" + Module.Language + "\\NewPages\\" + Cache.EscapePath(category) + ".txt"; using (TextReader streamReader = new StreamReader(fileName)) { string line; while ((line = streamReader.ReadLine()) != null) { string[] groups = line.Split(new char[] { '\t' }); if (groups[0] == Namespace.ToString()) { string title = groups[1].Replace('_', ' '); ignore.Add(title); } } } } var pageList = new List<Cache.PageInfo>(); var pages = new HashSet<string>(); foreach (var category in Categories) { string fileName = "Cache\\" + Module.Language + "\\NewPages\\" + Cache.EscapePath(category) + ".txt"; Console.Out.WriteLine("Processing data of " + category); using (TextReader streamReader = new StreamReader(fileName)) { string line; while ((line = streamReader.ReadLine()) != null) { string[] groups = line.Split(new char[] { '\t' }); if (groups[0] == Namespace.ToString()) { string title = groups[1].Replace('_', ' '); if (ignore.Contains(title)) { continue; } if (Namespace != 0) { title = wiki.GetNamespace(Namespace) + ":" + title; } Cache.PageInfo page = Cache.LoadPageInformation(wiki, Module.Language, title); if (page != null && !pages.Contains(page.Title)) { pages.Add(page.Title); pageList.Add(page); } } } } } pageList.Sort(CompareTime); Dictionary<string, string> wikiPages = new Dictionary<string, string>(); for (int i = 0; i < 7; ++i) { DateTime end = DateTime.Today.AddDays(1 - i); DateTime start = DateTime.Today.AddDays(-i); var subset = new List<Cache.PageInfo>(pageList.Where(p => p.FirstEdit.ToUniversalTime() >= start && p.FirstEdit.ToUniversalTime() < end)); var result = new List<string>(); foreach (var el in subset) { result.Add(string.Format(Format, Namespace != 0 ? el.Title.Substring(wiki.GetNamespace(Namespace).Length + 1) : el.Title, el.Author, el.FirstEdit.ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ"))); } if (result.Count != 0) { string pageName = Page + "/" + start.ToString("d MMMM yyyy"); string resultText = Header + string.Join(Delimeter, result.ToArray()) + Footer; wikiPages.Add(pageName, resultText); } } return wikiPages; }
public override string ProcessCategory(Wiki wiki, string text) { var result = new List<string>(); var pages = new HashSet<string>(); HashSet<string> ignore = new HashSet<string>(); foreach (var category in CategoriesToIgnore) { string fileName = "Cache\\" + Module.Language + "\\NewPages\\" + Cache.EscapePath(category) + ".txt"; using (TextReader streamReader = new StreamReader(fileName)) { string line; while ((line = streamReader.ReadLine()) != null) { string[] groups = line.Split(new char[] { '\t' }); if (groups[0] == Namespace.ToString()) { string title = groups[1].Replace('_', ' '); ignore.Add(title); } } } } ParameterCollection parameters = new ParameterCollection(); parameters.Add("redirects"); parameters.Add("prop", "revisions"); parameters.Add("rvprop", "content"); parameters.Add("rvsection", "0"); string file = "Cache\\" + Module.Language + "\\NewPages\\" + Cache.EscapePath(Categories[0]) + ".txt"; Console.Out.WriteLine("Processing data of " + Categories[0]); using (TextReader streamReader = new StreamReader(file)) { string line; while ((line = streamReader.ReadLine()) != null) { string[] groups = line.Split(new char[] { '\t' }); if (groups[0] == Namespace.ToString()) { string title = groups[1].Replace('_', ' '); if (ignore.Contains(title)) { continue; } string fullTitle = title; if (Namespace != 0) { fullTitle = wiki.GetNamespace(Namespace) + ":" + title; } XmlDocument xml = wiki.Query(QueryBy.Titles, parameters, new string[] { fullTitle }); XmlNode node = xml.SelectSingleNode("//rev"); if (node != null) { fullTitle = xml.SelectSingleNode("//page").Attributes["title"].Value; string content = node.FirstChild == null ? "" : node.FirstChild.Value; Match m = _regex.Match(content); if (m.Success) { string fileName = m.Groups["fileName"].Value.Trim(); if (string.IsNullOrEmpty(fileName)) { continue; } result.Add(string.Format(Format, Namespace != 0 ? fullTitle.Substring(wiki.GetNamespace(Namespace).Length + 1) : fullTitle, fileName)); } } } if (result.Count == MaxItems) { break; } } } if (result.Count < MaxItems) { string oldText = text; if (!string.IsNullOrEmpty(Header) && text.StartsWith(Header)) { oldText = oldText.Substring(Header.Length); } if (!string.IsNullOrEmpty(Footer) && oldText.EndsWith(Footer)) { oldText = oldText.Substring(0, oldText.Length - Footer.Length); } string[] items = oldText.Split(new string[] { Delimeter }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < items.Length && result.Count < MaxItems; ++i) { if (!result.Exists(l => l == items[i])) { result.Add(items[i]); } } } return Header + string.Join(Delimeter, result.ToArray()) + Footer; }