public string GetContent(LiveJournalTarget target, ILJClientData data) { Uri address = target.WithStyleMine(true) .WithCutExpand() .GetUri(); log.Info("Downloading " + address + "..."); CookieContainer cookieContainer = new CookieContainer(); using (HttpClientHandler handler = new HttpClientHandler { CookieContainer = cookieContainer }) using (HttpClient client = new HttpClient(handler) { BaseAddress = address }) { LJClientCookieData cookieData = data as LJClientCookieData; if (cookieData != null) { Dictionary <string, string> cookies = cookieData.GetCookiesToUse(); foreach (KeyValuePair <string, string> cookie in cookies) { log.DebugFormat("Using cookie {0}:{1}.", cookie.Key, cookie.Value); cookieContainer.Add(address, new Cookie(cookie.Key, cookie.Value)); } } string result = DownloadString(client, address); return(result); } }
/// <summary>Gets a page from a Url.</summary> public EntryPage GetFrom(LiveJournalTarget url, ILJClientData clientData) { // Gets the string. string content = Client.GetContent(url, clientData); // Parses the string as an entry page. EntryPage p = _parser.ParseAsAnEntryPage(content); return(p); }
public void DownloadsCommentPagesCorrectly() { int source = 4; int total = 10; ILJClient clientMock = MockRepository.GenerateMock <ILJClient>(); ILayerParser parserMock = MockRepository.GenerateMock <ILayerParser>(); // Creates a comment pages object by page index. Func <int, CommentPages> createCpByPage = cpi => { CommentPages c = new CommentPages(); c.Current = cpi; c.Total = total; if (c.Current != 1) { c.FirstUrl = new LiveJournalTarget("galkovsky", 1, page: 1).ToString(); c.PrevUrl = new LiveJournalTarget("galkovsky", 1, page: c.Current - 1).ToString(); } if (c.Current != total) { c.LastUrl = new LiveJournalTarget("galkovsky", 1, page: total).ToString(); c.NextUrl = new LiveJournalTarget("galkovsky", 1, page: c.Current + 1).ToString(); } return(c); }; clientMock.Expect(z => z.GetContent(Arg <LiveJournalTarget> .Is.NotNull, Arg <ILJClientData> .Is.Null)) .Return(null) .WhenCalled( _ => { LiveJournalTarget t = (LiveJournalTarget)_.Arguments[0]; int page = t.Page.Value; _.ReturnValue = page.ToString(); } ); parserMock.Expect(z => z.ParseAsAnEntryPage(Arg <string> .Is.Anything)) .Return(null) .WhenCalled( _ => { string req = (string)_.Arguments[0]; EntryPage ep = new EntryPage(); ep.CommentPages = createCpByPage(int.Parse(req)); _.ReturnValue = ep; } ); OtherPagesLoader opl = new OtherPagesLoader(parserMock, clientMock); // This is the source object we get from an entry page. CommentPages cp = createCpByPage(source); EntryPage[] others = opl.LoadOtherCommentPages(cp, null); Assert.AreEqual(total - 1, others.Length); IEnumerable <int> numbersWeExpect = Enumerable.Range(1, total) .Where(z => z != source); IEnumerable <int> numbersWeHave = others.Select(z => z.CommentPages.Current); CollectionAssert.AreEqual(numbersWeExpect, numbersWeHave); }
public bool AbsorbAllData(EntryPage freshSource, ILJClientData clientData, ref EntryPage dumpData) { bool appliedAnything = false; if (dumpData == null) { dumpData = new EntryPage(); appliedAnything = true; } appliedAnything |= _entryPageHelper.AddData(dumpData, freshSource); // TryGet all comments. EntryPage[] otherPages = _otherPagesLoader.LoadOtherCommentPages(freshSource.CommentPages, clientData); foreach (EntryPage pageX in otherPages) { appliedAnything |= _entryPageHelper.AddData(dumpData, pageX); } while (true) { IEnumerable <Comment> allFoldedComments = _repliesHelper.EnumerateRequiringFullUp(dumpData.Replies); IEnumerator <Comment> enumerator = allFoldedComments.GetEnumerator(); int foldedCommentsLeft = 0; Comment c = null; while (enumerator.MoveNext()) { foldedCommentsLeft++; if (c == null) { c = enumerator.Current; } } // How many comments left? log.Info(string.Format("Folded comments left: {0}.", foldedCommentsLeft)); if (foldedCommentsLeft == 0) { break; } LiveJournalTarget commentTarget = LiveJournalTarget.FromString(c.Url); EntryPage commentPage = GetFrom(commentTarget, clientData); Comment fullVersion = commentPage.Replies.Comments[0]; if (fullVersion.IsFull == false) { // This should be a suspended user. log.Info(string.Format("Comment {0} seems to be from a suspended user.", c)); c.IsSuspendedUser = true; continue; } log.Info(string.Format("Merging comment data for comment {0}.", c)); appliedAnything |= _repliesHelper.MergeFrom(c, fullVersion); } return(appliedAnything); }
public string ToShortStringIsFine(string username, long postId, long? commentId, int? page, bool styleMine) { LiveJournalTarget ret = new LiveJournalTarget(username, postId, commentId, page, styleMine); string result = ret.ToShortString(); return result; }
public EntryPage[] LoadOtherCommentPages(CommentPages commentPages, ILJClientData clientData) { int initialIndex = commentPages.Current; int total = commentPages.Total; log.Info( string.Format( "Loading other comment pages given page №{0} out of {1}." , commentPages.Current , commentPages.Total ) ); // We need to download these. int[] need = Enumerable.Range(1, total) .Where(i => i != initialIndex) .ToArray(); IDictionary <int, LiveJournalTarget> targets = new SortedDictionary <int, LiveJournalTarget>(); IDictionary <int, EntryPage> pages = new SortedDictionary <int, EntryPage>(); EntryPage p; CommentPages latest = commentPages; while (pages.Count < need.Length) { int cur = latest.Current; if (cur != 1 && !string.IsNullOrWhiteSpace(latest.FirstUrl)) { targets[1] = LiveJournalTarget.FromString(latest.FirstUrl); } if (cur != total && !string.IsNullOrWhiteSpace(latest.LastUrl)) { targets[total] = LiveJournalTarget.FromString(latest.LastUrl); } if (!string.IsNullOrWhiteSpace(latest.PrevUrl)) { targets[cur - 1] = LiveJournalTarget.FromString(latest.PrevUrl); } if (!string.IsNullOrWhiteSpace(latest.NextUrl)) { targets[cur + 1] = LiveJournalTarget.FromString(latest.NextUrl); } // First target without a page. int keyToDownload = targets.Keys.First(z => z != initialIndex && !pages.ContainsKey(z)); log.Info(string.Format("Will download page №{0}.", keyToDownload)); LiveJournalTarget targetToDownload = targets[keyToDownload]; // Download the page. string content = _client.GetContent(targetToDownload, clientData); p = _parser.ParseAsAnEntryPage(content); latest = p.CommentPages; pages[keyToDownload] = p; log.Info(string.Format("Parsed page №{0}.", keyToDownload)); } EntryPage[] ret = pages.Values.ToArray(); return(ret); }
public EntryPage Work(string URI, string rootLocation, IFolderNamingStrategy subFolderGetter, string cookie) { LiveJournalTarget t = LiveJournalTarget.FromString(URI); ILJClientData cookieData = _ext.Client.CreateDataObject(cookie); // Get fresh version. log.InfoFormat("Extracting {0}...", t); EntryPage freshSource = _ext.GetFrom(t, cookieData); string innerFolder; IEntryBase freshSourceEntry = freshSource.Entry; if (!subFolderGetter.TryGetSubfolderByEntry(freshSourceEntry, out innerFolder)) { string error = string.Format( "Cannot extract number from entry {0}, \"{1}\"." , freshSourceEntry.Id , freshSourceEntry.Subject ); throw new NotSupportedException(error); } string subFolder = string.Format("{0}\\{1}", freshSource.Entry.Date.Value.Year, innerFolder); string workLocation = _fs.Path.Combine(rootLocation, subFolder); log.Info("Will work from " + workLocation); EntryPage ep = null; string dumpFile = _fs.Path.Combine(workLocation, DumpFileName); if (_fs.File.Exists(dumpFile)) { log.Info("File " + DumpFileName + " exists, will load it..."); ep = _lp.ParseAsAnEntryPage(_fs.File.ReadAllText(dumpFile)); } else { log.Info("File " + DumpFileName + " does not exist."); } bool needsSaving = _ext.AbsorbAllData(freshSource, cookieData, ref ep); log.Info("Will save changes: " + needsSaving + "."); if (needsSaving) { // Save the content as is. string content = _lp.Serialize(ep); _fs.Directory.CreateDirectory(workLocation); UTF8Encoding enc = new UTF8Encoding(true); _fs.File.WriteAllText(dumpFile, content, enc); // Pick usable comments. List <Comment[]> comments = _scp.Pick(ep); log.Info("Picked threads: " + comments.Count + "."); // Everything we want to store. var allData = new List <EntryBase>(); allData.Add(ep.Entry); allData.AddRange(comments.SelectMany(a => a)); log.Info("Making sure everything is saved."); _rds.EnsureAllIsSaved(allData, rootLocation, workLocation); } log.Info("Finished."); return(ep); }