public EntryPage[] LoadOtherCommentPages(CommentPages commentPages, ILJClientData clientData) { int initialIndex = commentPages.Current; int total = commentPages.Total; log.Info( string.Format( "Loading other comment pages given page №{0} out of {1}." , commentPages.Current , commentPages.Total ) ); // We need to download these. int[] need = Enumerable.Range(1, total) .Where(i => i != initialIndex) .ToArray(); IDictionary <int, LiveJournalTarget> targets = new SortedDictionary <int, LiveJournalTarget>(); IDictionary <int, EntryPage> pages = new SortedDictionary <int, EntryPage>(); EntryPage p; CommentPages latest = commentPages; while (pages.Count < need.Length) { int cur = latest.Current; if (cur != 1 && !string.IsNullOrWhiteSpace(latest.FirstUrl)) { targets[1] = LiveJournalTarget.FromString(latest.FirstUrl); } if (cur != total && !string.IsNullOrWhiteSpace(latest.LastUrl)) { targets[total] = LiveJournalTarget.FromString(latest.LastUrl); } if (!string.IsNullOrWhiteSpace(latest.PrevUrl)) { targets[cur - 1] = LiveJournalTarget.FromString(latest.PrevUrl); } if (!string.IsNullOrWhiteSpace(latest.NextUrl)) { targets[cur + 1] = LiveJournalTarget.FromString(latest.NextUrl); } // First target without a page. int keyToDownload = targets.Keys.First(z => z != initialIndex && !pages.ContainsKey(z)); log.Info(string.Format("Will download page №{0}.", keyToDownload)); LiveJournalTarget targetToDownload = targets[keyToDownload]; // Download the page. string content = _client.GetContent(targetToDownload, clientData); p = _parser.ParseAsAnEntryPage(content); latest = p.CommentPages; pages[keyToDownload] = p; log.Info(string.Format("Parsed page №{0}.", keyToDownload)); } EntryPage[] ret = pages.Values.ToArray(); return(ret); }
public bool AbsorbAllData(EntryPage freshSource, ILJClientData clientData, ref EntryPage dumpData) { bool appliedAnything = false; if (dumpData == null) { dumpData = new EntryPage(); appliedAnything = true; } appliedAnything |= _entryPageHelper.AddData(dumpData, freshSource); // TryGet all comments. EntryPage[] otherPages = _otherPagesLoader.LoadOtherCommentPages(freshSource.CommentPages, clientData); foreach (EntryPage pageX in otherPages) { appliedAnything |= _entryPageHelper.AddData(dumpData, pageX); } while (true) { IEnumerable <Comment> allFoldedComments = _repliesHelper.EnumerateRequiringFullUp(dumpData.Replies); IEnumerator <Comment> enumerator = allFoldedComments.GetEnumerator(); int foldedCommentsLeft = 0; Comment c = null; while (enumerator.MoveNext()) { foldedCommentsLeft++; if (c == null) { c = enumerator.Current; } } // How many comments left? log.Info(string.Format("Folded comments left: {0}.", foldedCommentsLeft)); if (foldedCommentsLeft == 0) { break; } LiveJournalTarget commentTarget = LiveJournalTarget.FromString(c.Url); EntryPage commentPage = GetFrom(commentTarget, clientData); Comment fullVersion = commentPage.Replies.Comments[0]; if (fullVersion.IsFull == false) { // This should be a suspended user. log.Info(string.Format("Comment {0} seems to be from a suspended user.", c)); c.IsSuspendedUser = true; continue; } log.Info(string.Format("Merging comment data for comment {0}.", c)); appliedAnything |= _repliesHelper.MergeFrom(c, fullVersion); } return(appliedAnything); }
public EntryPage Work(string URI, string rootLocation, IFolderNamingStrategy subFolderGetter, string cookie) { LiveJournalTarget t = LiveJournalTarget.FromString(URI); ILJClientData cookieData = _ext.Client.CreateDataObject(cookie); // Get fresh version. log.InfoFormat("Extracting {0}...", t); EntryPage freshSource = _ext.GetFrom(t, cookieData); string innerFolder; IEntryBase freshSourceEntry = freshSource.Entry; if (!subFolderGetter.TryGetSubfolderByEntry(freshSourceEntry, out innerFolder)) { string error = string.Format( "Cannot extract number from entry {0}, \"{1}\"." , freshSourceEntry.Id , freshSourceEntry.Subject ); throw new NotSupportedException(error); } string subFolder = string.Format("{0}\\{1}", freshSource.Entry.Date.Value.Year, innerFolder); string workLocation = _fs.Path.Combine(rootLocation, subFolder); log.Info("Will work from " + workLocation); EntryPage ep = null; string dumpFile = _fs.Path.Combine(workLocation, DumpFileName); if (_fs.File.Exists(dumpFile)) { log.Info("File " + DumpFileName + " exists, will load it..."); ep = _lp.ParseAsAnEntryPage(_fs.File.ReadAllText(dumpFile)); } else { log.Info("File " + DumpFileName + " does not exist."); } bool needsSaving = _ext.AbsorbAllData(freshSource, cookieData, ref ep); log.Info("Will save changes: " + needsSaving + "."); if (needsSaving) { // Save the content as is. string content = _lp.Serialize(ep); _fs.Directory.CreateDirectory(workLocation); UTF8Encoding enc = new UTF8Encoding(true); _fs.File.WriteAllText(dumpFile, content, enc); // Pick usable comments. List <Comment[]> comments = _scp.Pick(ep); log.Info("Picked threads: " + comments.Count + "."); // Everything we want to store. var allData = new List <EntryBase>(); allData.Add(ep.Entry); allData.AddRange(comments.SelectMany(a => a)); log.Info("Making sure everything is saved."); _rds.EnsureAllIsSaved(allData, rootLocation, workLocation); } log.Info("Finished."); return(ep); }