private BlogEditingTemplate ParseBlogPostIntoTemplate(BlogPostRegionLocatorStrategy regionLocator, IProgressHost progress, string postUrl)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressCreatingEditingTemplate));

            BlogPostRegions regions            = regionLocator.LocateRegionsOnUIThread(progress, postUrl);
            IHTMLElement    primaryTitleRegion = GetPrimaryEditableTitleElement(regions.BodyRegion, regions.Document, regions.TitleRegions);

            // IF
            //   - primaryTitleRegion is not null (title found)
            //   - BodyRegion is null (no post body found)
            //   - AND primaryTitleRegion is a link
            if (primaryTitleRegion != null && regions.BodyRegion == null && primaryTitleRegion.tagName.ToLower() == "a")
            {
                // Title region was detected, but body region was not.
                // It is possible that only titles are shown on the homepage
                // Try requesting the post itself, and loading regions from the post itself

                // HACK Somewhere the 'about:' protocol replaces http/https, replace it again with the correct protocol
                var pathMatch = new Regex("^about:(.*)$").Match((primaryTitleRegion as IHTMLAnchorElement).href);
                Debug.Assert(pathMatch.Success);                                             // Assert that this URL is to the format we expect
                var newPostPath = pathMatch.Groups[1].Value;                                 // Grab the path from the URL
                var homepageUri = new Uri(_blogHomepageUrl);
                var newPostUrl  = $"{homepageUri.Scheme}://{homepageUri.Host}{newPostPath}"; // Recreate the full post URL

                // Set the NextTryPostUrl flag in the region locater
                // This will indicate to the other thread that another page should be parsed
                _nextTryPostUrl = newPostUrl;
                return(null);
            }

            BlogEditingTemplate template = GenerateBlogTemplate((IHTMLDocument3)regions.Document, primaryTitleRegion, regions.TitleRegions, regions.BodyRegion);

            progress.UpdateProgress(100, 100);
            return(template);
        }
Exemplo n.º 2
0
        private BlogEditingTemplate ParseBlogPostIntoTemplate(BlogPostRegionLocatorStrategy regionLocator, IProgressHost progress)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressCreatingEditingTemplate));

            BlogPostRegions     regions            = regionLocator.LocateRegionsOnUIThread(progress);
            IHTMLElement        primaryTitleRegion = GetPrimaryEditableTitleElement(regions.BodyRegion, regions.Document, regions.TitleRegions);
            BlogEditingTemplate template           = GenerateBlogTemplate((IHTMLDocument3)regions.Document, primaryTitleRegion, regions.TitleRegions, regions.BodyRegion);

            progress.UpdateProgress(100, 100);
            return(template);
        }
Exemplo n.º 3
0
        public override BlogPostRegions LocateRegionsOnUIThread(IProgressHost progress)
        {
            blogHomepageContents.Seek(0, SeekOrigin.Begin);
            IHTMLDocument2 doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(blogHomepageContents, _blogHomepageUrl);

            // Ensure that the document is fully loaded.
            // If it is not fully loaded, then viewing its current style is non-deterministic.
            DateTime startedDoingEvents = DateTime.Now;

            while (!progress.CancelRequested && !HTMLDocumentHelper.IsReady(doc2))
            {
                if (DateTime.Now.Subtract(startedDoingEvents).TotalMilliseconds > 10000)
                {
                    // Timing out here is not fatal.
                    Trace.WriteLine("Timed out while loading blog homepage for theme detection.");
                    break;
                }

                Application.DoEvents();
            }

            //The Google/Blogger dynamic templates load the pages dynmaically usig Ajax, so we dont have any template to use.
            if (IsUsingDynamicTemplate(doc2))
            {
                throw new BlogClientAbortGettingTemplateException();
            }

            IHTMLElement[] titles = FindText(_titleText, doc2.body);
            IHTMLElement[] bodies = FindText(_bodyText, doc2.body);
            if (titles.Length == 0 || bodies.Length == 0)
            {
                throw new Exception("Unable to locate blog post elements using most recent post");
            }

            if (IsSmartContent(bodies[0]))
            {
                throw new Exception("Most recent post is smart content");
            }

            BlogPostRegions regions = new BlogPostRegions();

            regions.TitleRegions = titles;

            //scrub the post body element to avoid improperly including extraneous parent elements
            regions.BodyRegion = ScrubPostBodyRegionParentElements(bodies[0]);
            regions.Document   = doc2;

            progress.UpdateProgress(100, 100);

            return(regions);
        }
Exemplo n.º 4
0
        private BlogPostRegions ParseBlogPostIntoTemplate(Stream stream, string postSourceUrl, IProgressHost progress)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressCreatingEditingTemplate));

            //parse the document to create the blog template
            IHTMLDocument2 doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(stream, postSourceUrl);
            IHTMLDocument3 doc  = (IHTMLDocument3)doc2;

            IHTMLElement[] titleElements = HTMLDocumentHelper.FindElementsContainingText(doc2, TEMPORARY_POST_TITLE_GUID);

            IHTMLElement bodyElement = HTMLDocumentHelper.FindElementContainingText(doc2, TEMPORARY_POST_BODY_GUID);

            if (bodyElement != null && bodyElement.tagName == "P")
            {
                //the body element is the <p> we planted, so replace it with a DIV since that will be the safest
                //element to have a as parent to all post content.
                IHTMLElement div = doc2.createElement("div");
                (bodyElement.parentElement as IHTMLDOMNode).replaceChild(div as IHTMLDOMNode, bodyElement as IHTMLDOMNode);
                bodyElement = div;
            }

            //locate the title element.  Note that is there are more than 1 copies of the title text detected, we use the one
            //that is anchored closest to the left or the body element.
            if (titleElements.Length > 0)
            {
                BlogPostRegions regions = new BlogPostRegions();
                regions.Document     = (IHTMLDocument)doc;
                regions.TitleRegions = titleElements;
                regions.BodyRegion   = bodyElement;

                progress.UpdateProgress(100, 100);
                return(regions);
            }
            else
            {
                throw new Exception("unable to access test post.");
            }
        }
        public override BlogPostRegions LocateRegionsOnUIThread(IProgressHost progress)
        {
            blogHomepageContents.Seek(0, SeekOrigin.Begin);
            IHTMLDocument2 doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(blogHomepageContents, _blogHomepageUrl);

            // Ensure that the document is fully loaded.
            // If it is not fully loaded, then viewing its current style is non-deterministic.
            DateTime startedDoingEvents = DateTime.Now;
            while (!progress.CancelRequested && !HTMLDocumentHelper.IsReady(doc2))
            {
                if (DateTime.Now.Subtract(startedDoingEvents).TotalMilliseconds > 10000)
                {
                    // Timing out here is not fatal.
                    Trace.WriteLine("Timed out while loading blog homepage for theme detection.");
                    break;
                }

                Application.DoEvents();
            }

            IHTMLElement[] titles = FindText(_titleText, doc2.body);
            IHTMLElement[] bodies = FindText(_bodyText, doc2.body);
            if (titles.Length == 0 || bodies.Length == 0)
                throw new Exception("Unable to locate blog post elements using most recent post");

            if (IsSmartContent(bodies[0]))
                throw new Exception("Most recent post is smart content");

            BlogPostRegions regions = new BlogPostRegions();
            regions.TitleRegions = titles;

            //scrub the post body element to avoid improperly including extraneous parent elements
            regions.BodyRegion = ScrubPostBodyRegionParentElements(bodies[0]);
            regions.Document = doc2;

            progress.UpdateProgress(100, 100);

            return regions;
        }
        private BlogPostRegions ParseBlogPostIntoTemplate(Stream stream, string postSourceUrl, IProgressHost progress)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressCreatingEditingTemplate));

            //parse the document to create the blog template
            IHTMLDocument2 doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(stream, postSourceUrl);
            IHTMLDocument3 doc = (IHTMLDocument3)doc2;
            IHTMLElement[] titleElements = HTMLDocumentHelper.FindElementsContainingText(doc2, TEMPORARY_POST_TITLE_GUID);

            IHTMLElement bodyElement = HTMLDocumentHelper.FindElementContainingText(doc2, TEMPORARY_POST_BODY_GUID);
            if (bodyElement != null && bodyElement.tagName == "P")
            {
                //the body element is the <p> we planted, so replace it with a DIV since that will be the safest
                //element to have a as parent to all post content.
                IHTMLElement div = doc2.createElement("div");
                (bodyElement.parentElement as IHTMLDOMNode).replaceChild(div as IHTMLDOMNode, bodyElement as IHTMLDOMNode);
                bodyElement = div;
            }

            //locate the title element.  Note that is there are more than 1 copies of the title text detected, we use the one
            //that is anchored closest to the left or the body element.
            if (titleElements.Length > 0)
            {
                BlogPostRegions regions = new BlogPostRegions();
                regions.Document = (IHTMLDocument)doc;
                regions.TitleRegions = titleElements;
                regions.BodyRegion = bodyElement;

                progress.UpdateProgress(100, 100);
                return regions;
            }
            else
            {
                throw new Exception("unable to access test post.");
            }
        }