public string GetContent(LiveJournalTarget target, ILJClientData data)
        {
            Uri address = target.WithStyleMine(true)
                          .WithCutExpand()
                          .GetUri();

            log.Info("Downloading " + address + "...");

            CookieContainer cookieContainer = new CookieContainer();

            using (HttpClientHandler handler = new HttpClientHandler {
                CookieContainer = cookieContainer
            })
                using (HttpClient client = new HttpClient(handler)
                {
                    BaseAddress = address
                })
                {
                    LJClientCookieData cookieData = data as LJClientCookieData;
                    if (cookieData != null)
                    {
                        Dictionary <string, string> cookies = cookieData.GetCookiesToUse();
                        foreach (KeyValuePair <string, string> cookie in cookies)
                        {
                            log.DebugFormat("Using cookie {0}:{1}.", cookie.Key, cookie.Value);
                            cookieContainer.Add(address, new Cookie(cookie.Key, cookie.Value));
                        }
                    }

                    string result = DownloadString(client, address);
                    return(result);
                }
        }
Esempio n. 2
0
        /// <summary>Gets a page from a Url.</summary>
        public EntryPage GetFrom(LiveJournalTarget url, ILJClientData clientData)
        {
            // Gets the string.
            string content = Client.GetContent(url, clientData);

            // Parses the string as an entry page.
            EntryPage p = _parser.ParseAsAnEntryPage(content);

            return(p);
        }
        public bool AbsorbAllData(EntryPage freshSource, ILJClientData clientData, ref EntryPage dumpData)
        {
            bool appliedAnything = false;

            if(dumpData == null)
            {
                dumpData = new EntryPage();
                appliedAnything = true;
            }

            appliedAnything |= _entryPageHelper.AddData(dumpData, freshSource);

            // TryGet all comments.
            EntryPage[] otherPages = _otherPagesLoader.LoadOtherCommentPages(freshSource.CommentPages, clientData);

            foreach(EntryPage pageX in otherPages)
                appliedAnything |= _entryPageHelper.AddData(dumpData, pageX);

            while(true)
            {
                IEnumerable<Comment> allFoldedComments = _repliesHelper.EnumerateRequiringFullUp(dumpData.Replies);
                IEnumerator<Comment> enumerator = allFoldedComments.GetEnumerator();

                int foldedCommentsLeft = 0;
                Comment c = null;

                while(enumerator.MoveNext())
                {
                    foldedCommentsLeft++;
                    if(c == null)
                        c = enumerator.Current;
                }

                // How many comments left?
                log.Info(String.Format("Folded comments left: {0}.", foldedCommentsLeft));
                if(foldedCommentsLeft == 0)
                    break;
                
                LiveJournalTarget commentTarget = LiveJournalTarget.FromString(c.Url);
                EntryPage commentPage = GetFrom(commentTarget, clientData);
                Comment fullVersion = commentPage.Replies.Comments[0];
                if(fullVersion.IsFull == false)
                {
                    // This should be a suspended user.
                    log.Info(String.Format("Comment {0} seems to be from a suspended user.", c));
                    c.IsSuspendedUser = true;
                    continue;
                }

                log.Info(String.Format("Merging comment data for comment {0}.", c));
                appliedAnything |= _repliesHelper.MergeFrom(c, fullVersion);
            }

            return appliedAnything;
        }
        public EntryPage[] LoadOtherCommentPages(CommentPages commentPages, ILJClientData clientData)
        {
            int initialIndex = commentPages.Current;
            int total = commentPages.Total;

            log.Info(String.Format("Loading other comment pages given page №{0} out of {1}.", commentPages.Current, commentPages.Total));

            // We need to download these.
            int[] need = Enumerable.Range(1, total).Where(i => i != initialIndex).ToArray();
            IDictionary<int, LiveJournalTarget> targets = new SortedDictionary<int, LiveJournalTarget>();
            IDictionary<int, EntryPage> pages = new SortedDictionary<int, EntryPage>();
            EntryPage p;

            CommentPages latest = commentPages;
            while(pages.Count < need.Length)
            {
                int cur = latest.Current;

                if(cur != 1 && !String.IsNullOrWhiteSpace(latest.FirstUrl))
                    targets[1] = LiveJournalTarget.FromString(latest.FirstUrl);
                if(cur != total && !String.IsNullOrWhiteSpace(latest.LastUrl))
                    targets[total] = LiveJournalTarget.FromString(latest.LastUrl);
                if(!String.IsNullOrWhiteSpace(latest.PrevUrl))
                    targets[cur - 1] = LiveJournalTarget.FromString(latest.PrevUrl);
                if(!String.IsNullOrWhiteSpace(latest.NextUrl))
                    targets[cur + 1] = LiveJournalTarget.FromString(latest.NextUrl);

                // First target without a page.
                int keyToDownload = targets.Keys.First(z => z != initialIndex && !pages.ContainsKey(z));
                log.Info(String.Format("Will download page №{0}.", keyToDownload));
                LiveJournalTarget targetToDownload = targets[keyToDownload];

                // Download the page.
                string content = _client.GetContent(targetToDownload, clientData);
                p = _parser.ParseAsAnEntryPage(content);
                latest = p.CommentPages;
                pages[keyToDownload] = p;
                log.Info(String.Format("Parsed page №{0}.", keyToDownload));
            }

            EntryPage[] ret = pages.Values.ToArray();
            return ret;
        }
        public string GetContent(LiveJournalTarget target, ILJClientData data)
        {
            Uri address = target.WithStyleMine(true).WithCutExpand().GetUri();
            log.Info("Downloading " + address + "...");

            var cookieContainer = new CookieContainer();
            using (HttpClientHandler handler = new HttpClientHandler() { CookieContainer = cookieContainer })
            using (HttpClient client = new HttpClient(handler) { BaseAddress = address })
            {
                LJClientCookieData cookieData = (data as LJClientCookieData);
                if (cookieData != null)
                {
                    Dictionary<string, string> cookies = cookieData.GetCookiesToUse();
                    foreach (var cookie in cookies)
                    {
                        log.DebugFormat("Using cookie {0}:{1}.", cookie.Key, cookie.Value);
                        cookieContainer.Add(address, new Cookie(cookie.Key, cookie.Value));
                    }
                }

                string result = DownloadString(client, address);
                return result;
            }
        }
Esempio n. 6
0
        public bool AbsorbAllData(EntryPage freshSource, ILJClientData clientData, ref EntryPage dumpData)
        {
            bool appliedAnything = false;

            if (dumpData == null)
            {
                dumpData        = new EntryPage();
                appliedAnything = true;
            }

            appliedAnything |= _entryPageHelper.AddData(dumpData, freshSource);

            // TryGet all comments.
            EntryPage[] otherPages = _otherPagesLoader.LoadOtherCommentPages(freshSource.CommentPages, clientData);

            foreach (EntryPage pageX in otherPages)
            {
                appliedAnything |= _entryPageHelper.AddData(dumpData, pageX);
            }

            while (true)
            {
                IEnumerable <Comment> allFoldedComments = _repliesHelper.EnumerateRequiringFullUp(dumpData.Replies);
                IEnumerator <Comment> enumerator        = allFoldedComments.GetEnumerator();

                int     foldedCommentsLeft = 0;
                Comment c = null;

                while (enumerator.MoveNext())
                {
                    foldedCommentsLeft++;
                    if (c == null)
                    {
                        c = enumerator.Current;
                    }
                }

                // How many comments left?
                log.Info(string.Format("Folded comments left: {0}.", foldedCommentsLeft));
                if (foldedCommentsLeft == 0)
                {
                    break;
                }

                LiveJournalTarget commentTarget = LiveJournalTarget.FromString(c.Url);
                EntryPage         commentPage   = GetFrom(commentTarget, clientData);
                Comment           fullVersion   = commentPage.Replies.Comments[0];
                if (fullVersion.IsFull == false)
                {
                    // This should be a suspended user.
                    log.Info(string.Format("Comment {0} seems to be from a suspended user.", c));
                    c.IsSuspendedUser = true;
                    continue;
                }

                log.Info(string.Format("Merging comment data for comment {0}.", c));
                appliedAnything |= _repliesHelper.MergeFrom(c, fullVersion);
            }

            return(appliedAnything);
        }
Esempio n. 7
0
        public EntryPage[] LoadOtherCommentPages(CommentPages commentPages, ILJClientData clientData)
        {
            int initialIndex = commentPages.Current;
            int total        = commentPages.Total;

            log.Info(
                string.Format(
                    "Loading other comment pages given page №{0} out of {1}."
                    , commentPages.Current
                    , commentPages.Total
                    )
                );

            // We need to download these.
            int[] need = Enumerable.Range(1, total)
                         .Where(i => i != initialIndex)
                         .ToArray();
            IDictionary <int, LiveJournalTarget> targets = new SortedDictionary <int, LiveJournalTarget>();
            IDictionary <int, EntryPage>         pages   = new SortedDictionary <int, EntryPage>();
            EntryPage p;

            CommentPages latest = commentPages;

            while (pages.Count < need.Length)
            {
                int cur = latest.Current;

                if (cur != 1 && !string.IsNullOrWhiteSpace(latest.FirstUrl))
                {
                    targets[1] = LiveJournalTarget.FromString(latest.FirstUrl);
                }

                if (cur != total && !string.IsNullOrWhiteSpace(latest.LastUrl))
                {
                    targets[total] = LiveJournalTarget.FromString(latest.LastUrl);
                }

                if (!string.IsNullOrWhiteSpace(latest.PrevUrl))
                {
                    targets[cur - 1] = LiveJournalTarget.FromString(latest.PrevUrl);
                }

                if (!string.IsNullOrWhiteSpace(latest.NextUrl))
                {
                    targets[cur + 1] = LiveJournalTarget.FromString(latest.NextUrl);
                }

                // First target without a page.
                int keyToDownload = targets.Keys.First(z => z != initialIndex && !pages.ContainsKey(z));
                log.Info(string.Format("Will download page №{0}.", keyToDownload));
                LiveJournalTarget targetToDownload = targets[keyToDownload];

                // Download the page.
                string content = _client.GetContent(targetToDownload, clientData);
                p      = _parser.ParseAsAnEntryPage(content);
                latest = p.CommentPages;
                pages[keyToDownload] = p;
                log.Info(string.Format("Parsed page №{0}.", keyToDownload));
            }

            EntryPage[] ret = pages.Values.ToArray();
            return(ret);
        }
        /// <summary>Gets a page from a Url.</summary>
        public EntryPage GetFrom(LiveJournalTarget url, ILJClientData clientData)
        {
            // Gets the string.
            string content = Client.GetContent(url, clientData);

            // Parses the string as an entry page.
            EntryPage p = _parser.ParseAsAnEntryPage(content);

            return p;
        }
Esempio n. 9
0
        public EntryPage Work(string URI, string rootLocation, IFolderNamingStrategy subFolderGetter, string cookie)
        {
            LiveJournalTarget t          = LiveJournalTarget.FromString(URI);
            ILJClientData     cookieData = _ext.Client.CreateDataObject(cookie);

            // Get fresh version.
            log.InfoFormat("Extracting {0}...", t);
            EntryPage freshSource = _ext.GetFrom(t, cookieData);

            string     innerFolder;
            IEntryBase freshSourceEntry = freshSource.Entry;

            if (!subFolderGetter.TryGetSubfolderByEntry(freshSourceEntry, out innerFolder))
            {
                string error = string.Format(
                    "Cannot extract number from entry {0}, \"{1}\"."
                    , freshSourceEntry.Id
                    , freshSourceEntry.Subject
                    );
                throw new NotSupportedException(error);
            }

            string subFolder = string.Format("{0}\\{1}", freshSource.Entry.Date.Value.Year, innerFolder);

            string workLocation = _fs.Path.Combine(rootLocation, subFolder);

            log.Info("Will work from " + workLocation);

            EntryPage ep       = null;
            string    dumpFile = _fs.Path.Combine(workLocation, DumpFileName);

            if (_fs.File.Exists(dumpFile))
            {
                log.Info("File " + DumpFileName + " exists, will load it...");
                ep = _lp.ParseAsAnEntryPage(_fs.File.ReadAllText(dumpFile));
            }
            else
            {
                log.Info("File " + DumpFileName + " does not exist.");
            }

            bool needsSaving = _ext.AbsorbAllData(freshSource, cookieData, ref ep);

            log.Info("Will save changes: " + needsSaving + ".");
            if (needsSaving)
            {
                // Save the content as is.
                string content = _lp.Serialize(ep);
                _fs.Directory.CreateDirectory(workLocation);

                UTF8Encoding enc = new UTF8Encoding(true);
                _fs.File.WriteAllText(dumpFile, content, enc);

                // Pick usable comments.
                List <Comment[]> comments = _scp.Pick(ep);
                log.Info("Picked threads: " + comments.Count + ".");

                // Everything we want to store.
                var allData = new List <EntryBase>();
                allData.Add(ep.Entry);
                allData.AddRange(comments.SelectMany(a => a));

                log.Info("Making sure everything is saved.");
                _rds.EnsureAllIsSaved(allData, rootLocation, workLocation);
            }

            log.Info("Finished.");
            return(ep);
        }