public string GetContent(LiveJournalTarget target, ILJClientData data) { Uri address = target.WithStyleMine(true) .WithCutExpand() .GetUri(); log.Info("Downloading " + address + "..."); CookieContainer cookieContainer = new CookieContainer(); using (HttpClientHandler handler = new HttpClientHandler { CookieContainer = cookieContainer }) using (HttpClient client = new HttpClient(handler) { BaseAddress = address }) { LJClientCookieData cookieData = data as LJClientCookieData; if (cookieData != null) { Dictionary <string, string> cookies = cookieData.GetCookiesToUse(); foreach (KeyValuePair <string, string> cookie in cookies) { log.DebugFormat("Using cookie {0}:{1}.", cookie.Key, cookie.Value); cookieContainer.Add(address, new Cookie(cookie.Key, cookie.Value)); } } string result = DownloadString(client, address); return(result); } }
/// <summary>Gets a page from a Url.</summary> public EntryPage GetFrom(LiveJournalTarget url, ILJClientData clientData) { // Gets the string. string content = Client.GetContent(url, clientData); // Parses the string as an entry page. EntryPage p = _parser.ParseAsAnEntryPage(content); return(p); }
public bool AbsorbAllData(EntryPage freshSource, ILJClientData clientData, ref EntryPage dumpData) { bool appliedAnything = false; if(dumpData == null) { dumpData = new EntryPage(); appliedAnything = true; } appliedAnything |= _entryPageHelper.AddData(dumpData, freshSource); // TryGet all comments. EntryPage[] otherPages = _otherPagesLoader.LoadOtherCommentPages(freshSource.CommentPages, clientData); foreach(EntryPage pageX in otherPages) appliedAnything |= _entryPageHelper.AddData(dumpData, pageX); while(true) { IEnumerable<Comment> allFoldedComments = _repliesHelper.EnumerateRequiringFullUp(dumpData.Replies); IEnumerator<Comment> enumerator = allFoldedComments.GetEnumerator(); int foldedCommentsLeft = 0; Comment c = null; while(enumerator.MoveNext()) { foldedCommentsLeft++; if(c == null) c = enumerator.Current; } // How many comments left? log.Info(String.Format("Folded comments left: {0}.", foldedCommentsLeft)); if(foldedCommentsLeft == 0) break; LiveJournalTarget commentTarget = LiveJournalTarget.FromString(c.Url); EntryPage commentPage = GetFrom(commentTarget, clientData); Comment fullVersion = commentPage.Replies.Comments[0]; if(fullVersion.IsFull == false) { // This should be a suspended user. log.Info(String.Format("Comment {0} seems to be from a suspended user.", c)); c.IsSuspendedUser = true; continue; } log.Info(String.Format("Merging comment data for comment {0}.", c)); appliedAnything |= _repliesHelper.MergeFrom(c, fullVersion); } return appliedAnything; }
public EntryPage[] LoadOtherCommentPages(CommentPages commentPages, ILJClientData clientData) { int initialIndex = commentPages.Current; int total = commentPages.Total; log.Info(String.Format("Loading other comment pages given page №{0} out of {1}.", commentPages.Current, commentPages.Total)); // We need to download these. int[] need = Enumerable.Range(1, total).Where(i => i != initialIndex).ToArray(); IDictionary<int, LiveJournalTarget> targets = new SortedDictionary<int, LiveJournalTarget>(); IDictionary<int, EntryPage> pages = new SortedDictionary<int, EntryPage>(); EntryPage p; CommentPages latest = commentPages; while(pages.Count < need.Length) { int cur = latest.Current; if(cur != 1 && !String.IsNullOrWhiteSpace(latest.FirstUrl)) targets[1] = LiveJournalTarget.FromString(latest.FirstUrl); if(cur != total && !String.IsNullOrWhiteSpace(latest.LastUrl)) targets[total] = LiveJournalTarget.FromString(latest.LastUrl); if(!String.IsNullOrWhiteSpace(latest.PrevUrl)) targets[cur - 1] = LiveJournalTarget.FromString(latest.PrevUrl); if(!String.IsNullOrWhiteSpace(latest.NextUrl)) targets[cur + 1] = LiveJournalTarget.FromString(latest.NextUrl); // First target without a page. int keyToDownload = targets.Keys.First(z => z != initialIndex && !pages.ContainsKey(z)); log.Info(String.Format("Will download page №{0}.", keyToDownload)); LiveJournalTarget targetToDownload = targets[keyToDownload]; // Download the page. string content = _client.GetContent(targetToDownload, clientData); p = _parser.ParseAsAnEntryPage(content); latest = p.CommentPages; pages[keyToDownload] = p; log.Info(String.Format("Parsed page №{0}.", keyToDownload)); } EntryPage[] ret = pages.Values.ToArray(); return ret; }
public string GetContent(LiveJournalTarget target, ILJClientData data) { Uri address = target.WithStyleMine(true).WithCutExpand().GetUri(); log.Info("Downloading " + address + "..."); var cookieContainer = new CookieContainer(); using (HttpClientHandler handler = new HttpClientHandler() { CookieContainer = cookieContainer }) using (HttpClient client = new HttpClient(handler) { BaseAddress = address }) { LJClientCookieData cookieData = (data as LJClientCookieData); if (cookieData != null) { Dictionary<string, string> cookies = cookieData.GetCookiesToUse(); foreach (var cookie in cookies) { log.DebugFormat("Using cookie {0}:{1}.", cookie.Key, cookie.Value); cookieContainer.Add(address, new Cookie(cookie.Key, cookie.Value)); } } string result = DownloadString(client, address); return result; } }
public bool AbsorbAllData(EntryPage freshSource, ILJClientData clientData, ref EntryPage dumpData) { bool appliedAnything = false; if (dumpData == null) { dumpData = new EntryPage(); appliedAnything = true; } appliedAnything |= _entryPageHelper.AddData(dumpData, freshSource); // TryGet all comments. EntryPage[] otherPages = _otherPagesLoader.LoadOtherCommentPages(freshSource.CommentPages, clientData); foreach (EntryPage pageX in otherPages) { appliedAnything |= _entryPageHelper.AddData(dumpData, pageX); } while (true) { IEnumerable <Comment> allFoldedComments = _repliesHelper.EnumerateRequiringFullUp(dumpData.Replies); IEnumerator <Comment> enumerator = allFoldedComments.GetEnumerator(); int foldedCommentsLeft = 0; Comment c = null; while (enumerator.MoveNext()) { foldedCommentsLeft++; if (c == null) { c = enumerator.Current; } } // How many comments left? log.Info(string.Format("Folded comments left: {0}.", foldedCommentsLeft)); if (foldedCommentsLeft == 0) { break; } LiveJournalTarget commentTarget = LiveJournalTarget.FromString(c.Url); EntryPage commentPage = GetFrom(commentTarget, clientData); Comment fullVersion = commentPage.Replies.Comments[0]; if (fullVersion.IsFull == false) { // This should be a suspended user. log.Info(string.Format("Comment {0} seems to be from a suspended user.", c)); c.IsSuspendedUser = true; continue; } log.Info(string.Format("Merging comment data for comment {0}.", c)); appliedAnything |= _repliesHelper.MergeFrom(c, fullVersion); } return(appliedAnything); }
public EntryPage[] LoadOtherCommentPages(CommentPages commentPages, ILJClientData clientData) { int initialIndex = commentPages.Current; int total = commentPages.Total; log.Info( string.Format( "Loading other comment pages given page №{0} out of {1}." , commentPages.Current , commentPages.Total ) ); // We need to download these. int[] need = Enumerable.Range(1, total) .Where(i => i != initialIndex) .ToArray(); IDictionary <int, LiveJournalTarget> targets = new SortedDictionary <int, LiveJournalTarget>(); IDictionary <int, EntryPage> pages = new SortedDictionary <int, EntryPage>(); EntryPage p; CommentPages latest = commentPages; while (pages.Count < need.Length) { int cur = latest.Current; if (cur != 1 && !string.IsNullOrWhiteSpace(latest.FirstUrl)) { targets[1] = LiveJournalTarget.FromString(latest.FirstUrl); } if (cur != total && !string.IsNullOrWhiteSpace(latest.LastUrl)) { targets[total] = LiveJournalTarget.FromString(latest.LastUrl); } if (!string.IsNullOrWhiteSpace(latest.PrevUrl)) { targets[cur - 1] = LiveJournalTarget.FromString(latest.PrevUrl); } if (!string.IsNullOrWhiteSpace(latest.NextUrl)) { targets[cur + 1] = LiveJournalTarget.FromString(latest.NextUrl); } // First target without a page. int keyToDownload = targets.Keys.First(z => z != initialIndex && !pages.ContainsKey(z)); log.Info(string.Format("Will download page №{0}.", keyToDownload)); LiveJournalTarget targetToDownload = targets[keyToDownload]; // Download the page. string content = _client.GetContent(targetToDownload, clientData); p = _parser.ParseAsAnEntryPage(content); latest = p.CommentPages; pages[keyToDownload] = p; log.Info(string.Format("Parsed page №{0}.", keyToDownload)); } EntryPage[] ret = pages.Values.ToArray(); return(ret); }
/// <summary>Gets a page from a Url.</summary> public EntryPage GetFrom(LiveJournalTarget url, ILJClientData clientData) { // Gets the string. string content = Client.GetContent(url, clientData); // Parses the string as an entry page. EntryPage p = _parser.ParseAsAnEntryPage(content); return p; }
public EntryPage Work(string URI, string rootLocation, IFolderNamingStrategy subFolderGetter, string cookie) { LiveJournalTarget t = LiveJournalTarget.FromString(URI); ILJClientData cookieData = _ext.Client.CreateDataObject(cookie); // Get fresh version. log.InfoFormat("Extracting {0}...", t); EntryPage freshSource = _ext.GetFrom(t, cookieData); string innerFolder; IEntryBase freshSourceEntry = freshSource.Entry; if (!subFolderGetter.TryGetSubfolderByEntry(freshSourceEntry, out innerFolder)) { string error = string.Format( "Cannot extract number from entry {0}, \"{1}\"." , freshSourceEntry.Id , freshSourceEntry.Subject ); throw new NotSupportedException(error); } string subFolder = string.Format("{0}\\{1}", freshSource.Entry.Date.Value.Year, innerFolder); string workLocation = _fs.Path.Combine(rootLocation, subFolder); log.Info("Will work from " + workLocation); EntryPage ep = null; string dumpFile = _fs.Path.Combine(workLocation, DumpFileName); if (_fs.File.Exists(dumpFile)) { log.Info("File " + DumpFileName + " exists, will load it..."); ep = _lp.ParseAsAnEntryPage(_fs.File.ReadAllText(dumpFile)); } else { log.Info("File " + DumpFileName + " does not exist."); } bool needsSaving = _ext.AbsorbAllData(freshSource, cookieData, ref ep); log.Info("Will save changes: " + needsSaving + "."); if (needsSaving) { // Save the content as is. string content = _lp.Serialize(ep); _fs.Directory.CreateDirectory(workLocation); UTF8Encoding enc = new UTF8Encoding(true); _fs.File.WriteAllText(dumpFile, content, enc); // Pick usable comments. List <Comment[]> comments = _scp.Pick(ep); log.Info("Picked threads: " + comments.Count + "."); // Everything we want to store. var allData = new List <EntryBase>(); allData.Add(ep.Entry); allData.AddRange(comments.SelectMany(a => a)); log.Info("Making sure everything is saved."); _rds.EnsureAllIsSaved(allData, rootLocation, workLocation); } log.Info("Finished."); return(ep); }