Exemple #1
0
            public PostInfoItem(PostInfo postInfo, BlogPostListBox listBox)
            {
                _postInfo = postInfo;
                _listbox  = listBox;

                SearchIndex = HTMLDocumentHelper.HTMLToPlainText(postInfo.Title.ToLower(CultureInfo.CurrentUICulture) + " " + postInfo.Contents.ToLower(CultureInfo.CurrentUICulture), true);
            }
Exemple #2
0
        protected IHTMLDocument2 GetWeblogHomepageDOM(IProgressHost progressHost)
        {
            // try download the weblog home page
            UpdateProgress(progressHost, 25, Res.Get(StringId.ProgressAnalyzingHomepage));
            string         responseUri;
            IHTMLDocument2 weblogDOM = HTMLDocumentHelper.SafeGetHTMLDocumentFromUrl(_homepageUrl, out responseUri);

            if (responseUri != null && responseUri != _homepageUrl)
            {
                _homepageUrl = responseUri;
            }
            if (weblogDOM != null)
            {
                // default the blog name to the title of the document
                if (weblogDOM.title != null)
                {
                    _blogName = weblogDOM.title;

                    // drop anything to the right of a "|", as it usually is a site name
                    int index = _blogName.IndexOf("|", StringComparison.OrdinalIgnoreCase);
                    if (index > 0)
                    {
                        string newname = _blogName.Substring(0, index).Trim();
                        if (newname != String.Empty)
                        {
                            _blogName = newname;
                        }
                    }
                }
            }

            return(weblogDOM);
        }
Exemple #3
0
        public IHTMLDocument2 GetPublishDocument()
        {
            string body = contentEditor.Publish(null);

            // Before we drop the body into the template, we wrap the whole thing in the user's default font
            // This will help cover for any blocks of text that while editing had their font set by body style.  We
            // cannot send the body style in emails because it will get stripped by some email providers.
            body = contentEditor.CurrentDefaultFont.ApplyFontToBody(body);

            // We also need to wrap the email in a default direction because we support LTR/RTL per paragraph but
            // we inherit the default direction from the body.
            // NOTE: Now that we set the direction of the body (a few lines below) this may not be needed.  It is
            // currently kept to avoid possible regressions with external mail providers
            string dir = contentEditor.IsRTLTemplate ? "rtl" : "ltr";

            body = string.Format(CultureInfo.InvariantCulture, "<div dir=\"{0}\">{1}</div>", dir, body);

            // This forms the whole html document by combining the theme and the body and then turning it into an IHTMLDocument2
            // This is needed for WLM so they can reuse packaging code.
            // We wrap the html document with a class that improves the representation of smart content for an email's plain text MIME part.
            // In order to minimize the potential for regressions we only wrap in the case of a photomail.
            IHTMLDocument2 publishDocument = HTMLDocumentHelper.StringToHTMLDoc(_wysiwygHTML.Replace("{post-body}", body), "");

            // WinLive 262662: although many features work by wrapping the email in a direction div, the
            // email as a whole is determined by the direction defined in the body
            publishDocument.body.setAttribute("dir", dir, 1);

            return(publishDocument);
        }
Exemple #4
0
        private IHTMLElement ScrubPostBodyRegionParentElements(IHTMLElement postBodyElement)
        {
            //Note: This method prevents the case where the post content consists of a single line of text, so surrounding elements
            //like that are part of the post content (like <p> and <b>) are improperly included in the template.

            //since the text-based find strategy returns the common parent element that contains the text,
            //it is possible for the element to be an inline element (like <b> or <i>) that entirely surrounds
            //the content.  It would be improper to include these inline elements in the template, so we delete all inline
            //elements above the post body.
            try
            {
                IHTMLDocument2 postContentDoc       = HTMLDocumentHelper.StringToHTMLDoc(mostRecentPost.Contents, this._blogHomepageUrl);
                IHTMLElement[] postContentsElements = FindText(HTMLDocumentHelper.HTMLToPlainText(postContentDoc.body.innerHTML), postContentDoc.body);
                if (postContentsElements.Length > 0)
                {
                    IHTMLElement postContentsElement = postContentsElements[0];
                    if (postContentsElement is IHTMLBodyElement)
                    {
                        //there are no extraneous surrounding tags to clean up, so just return the current body element
                        return(postBodyElement);
                    }

                    //postBodyElement and postContentsElement should now be the equivalent element in each document.
                    //Now delete the parent elements of postContentsElement that appear in the postBodyElement DOM since
                    //these will clearly be elements associated with the post, and not the template.
                    ArrayList markedForDelete = new ArrayList();
                    while (!(postContentsElement is IHTMLBodyElement) && !(postBodyElement is IHTMLBodyElement))
                    {
                        if (postContentsElement.tagName == postBodyElement.tagName)
                        {
                            markedForDelete.Add(postBodyElement);
                        }
                        postContentsElement = postContentsElement.parentElement;
                        postBodyElement     = postBodyElement.parentElement;
                    }

                    if (markedForDelete.Count > 0)
                    {
                        //delete all of the marked elements except the last one
                        for (int i = markedForDelete.Count - 2; i >= 0; i--)
                        {
                            IHTMLDOMNode domNode = (IHTMLDOMNode)markedForDelete[i];
                            domNode.removeNode(false);
                        }

                        //delete the last node and return its parent as the new post body region element
                        IHTMLElement lastMarkForDeleteElement = (IHTMLElement)markedForDelete[markedForDelete.Count - 1];
                        IHTMLElement newPostBodyElement       = lastMarkForDeleteElement.parentElement;
                        (lastMarkForDeleteElement as IHTMLDOMNode).removeNode(false);
                        return(newPostBodyElement);
                    }
                }
            }
            catch (Exception e)
            {
                //This is an error we should look at, but it should not abort template detection.
                Debug.Fail("Cleanup logic failed with an error", e.ToString());
            }
            return(postBodyElement);
        }
Exemple #5
0
        public ContentEditorProxy(ContentEditorFactory factory, IContentEditorSite contentEditorSite, IInternetSecurityManager internetSecurityManager, IHTMLDocument2 htmlDocument, HtmlInsertOptions options, int dlControlFlags, string color, string wpost)
        {
            string content = htmlDocument.body.innerHTML;

            htmlDocument.body.innerHTML = "{post-body}";
            string   wysiwygHTML                   = HTMLDocumentHelper.HTMLDocToString(htmlDocument);
            BlogPost documentToBeLoaded            = null;
            IBlogPostEditingContext editingContext = null;

            if (string.IsNullOrEmpty(wpost) || !File.Exists(wpost))
            {
                documentToBeLoaded = new BlogPost();
                editingContext     = new BlogPostEditingContext(ContentEditorAccountAdapter.AccountId,
                                                                documentToBeLoaded);
            }
            else
            {
                PostEditorFile wpostxFile = PostEditorFile.GetExisting(new FileInfo(wpost));
                editingContext = wpostxFile.Load(false);
                editingContext.BlogPost.Contents = "";
            }

            if (!string.IsNullOrEmpty(content))
            {
                delayedInsertOperations.Enqueue(new DelayedInsert(content, options));
            }

            ContentEditorProxyCore(factory, contentEditorSite, internetSecurityManager, wysiwygHTML, null, editingContext, new ContentEditorTemplateStrategy(), dlControlFlags, color);
        }
Exemple #6
0
        public IContentEditor CreateEditorFromMoniker(IContentEditorSite contentEditorSite, IInternetSecurityManager internetSecurityManager, IMoniker moniker, uint codepage, HtmlInsertOptions options, string color, int dlControlFlags, string wpost)
        {
            codepage = EmailShim.GetCodepage(codepage);
            string name;
            string html = HTMLDocumentHelper.MonikerToString(moniker, codepage, out name);

            if (CultureHelper.IsRtlCodepage(codepage))
            {
                EmailContentTarget target =
                    GlobalEditorOptions.ContentTarget as EmailContentTarget;
                if (target != null)
                {
                    target.EnableRtlMode();
                }
            }

            if (string.IsNullOrEmpty(html))
            {
                html = "<html><body></body></html>";
            }

            html = EmailShim.GetContentHtml(name, html);

            // Create a IHtmlDocument2 from the html which will then be loaded into the editor
            IHTMLDocument2 htmlDocument;

            htmlDocument = HTMLDocumentHelper.StringToHTMLDoc(html, name);

            return(new ContentEditorProxy(this, contentEditorSite, internetSecurityManager, htmlDocument, options, dlControlFlags, color, wpost));
        }
        public static string CleanupHtml(string html, string baseUrl, bool preserveImages, bool strip, bool preserveTables)
        {
            // sterilize the HTML
            html = UnsafeHtmlFragmentHelper.SterilizeHtml(html, UnsafeHtmlFragmentHelper.Flag.AllFlags ^ UnsafeHtmlFragmentHelper.Flag.RemoveStyles);

            html = StripNamespacedTags(html);

            // get the text into a DOM to ensure tags are balanced
            IHTMLDocument2 document = HTMLDocumentHelper.StringToHTMLDoc(html, null, false);

            if (document.body == null)
            {
                return(string.Empty);
            }

            // thin it
            if (preserveTables)
            {
                html = LightWeightHTMLThinner2.Thin(document.body.innerHTML, preserveImages, strip, LightWeightHTMLThinner2.PreserveTables);
            }
            else
            {
                html = LightWeightHTMLThinner2.Thin(document.body.innerHTML, preserveImages, strip);
            }
            html = LightWeightHTMLUrlToAbsolute.ConvertToAbsolute(html, baseUrl, false, true, true);

            // balance it
            string balancedHtml = HTMLBalancer.Balance(html);

            // return
            return(balancedHtml);
        }
        public void DoCapture()
        {
            // show the form w/o activating then make it invisible
            User32.SetWindowPos(Handle, HWND.BOTTOM, -1, -1, 1, 1, SWP.NOACTIVATE);
            Visible = false;

            // determine the url used for navigation
            string navigateUrl;

            if (_htmlScreenCaptureCore.HtmlUrl != null)
            {
                navigateUrl = _htmlScreenCaptureCore.HtmlUrl;
            }
            else
            {
                _contentFile = TempFileManager.Instance.CreateTempFile("content.htm");
                using (TextWriter textWriter = new StreamWriter(_contentFile, false, Encoding.UTF8))
                {
                    String html = _htmlScreenCaptureCore.HtmlContent;

                    //add the "Mark Of The Web" so that the HTML will execute in the Internet Zone
                    //otherwise, it will execute in the Local Machine zone, which won't allow JavaScript
                    //or object/embed tags.
                    html = HTMLDocumentHelper.AddMarkOfTheWeb(html, "about:internet");

                    textWriter.Write(html);
                }
                navigateUrl = _contentFile;
            }

            // navigate to the file then wait for document complete for further processing
            _browserControl.DocumentComplete += new BrowserDocumentEventHandler(_browserControl_DocumentComplete);
            _browserControl.Navigate(navigateUrl);
        }
        private static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url, string name, bool escapePaths, bool escapeEmptyString)
        {
            string escapedHtml = HTMLDocumentHelper.HTMLDocToString(htmlDocument);

            if (escapedHtml == null)
            {
                return(null);
            }

            if (escapePaths)
            {
                escapedHtml = LightWeightHTMLUrlToAbsolute.ConvertToAbsolute(escapedHtml, url, true, escapeEmptyString);
            }

            LightWeightHTMLDocument finalDocument = new LightWeightHTMLDocument(escapedHtml, url, name);

            // Set the Frames
            finalDocument.SetFrames(GetLightWeightDocumentForFrames(htmlDocument));

            // Set the styles
            finalDocument.SetStyleReferences(HTMLDocumentHelper.GetStyleReferencesForDocument(htmlDocument, url));

            // Set the DocType
            HTMLDocumentHelper.SpecialHeaders specialHeaders = HTMLDocumentHelper.GetSpecialHeaders(htmlDocument);
            finalDocument._docType   = specialHeaders.DocType;
            finalDocument._savedFrom = specialHeaders.SavedFrom;

            finalDocument.Parse();
            return(finalDocument);
        }
        private string DownloadTemplateFiles(string templateContents, string templateUrl, IProgressHost progress)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressDownloadingSupportingFiles));
            FileBasedSiteStorage files = new FileBasedSiteStorage(_blogTemplateDir);

            // convert the string to a stream
            MemoryStream templateStream = new MemoryStream();
            StreamWriter writer         = new StreamWriter(templateStream, Encoding.UTF8);

            writer.Write(templateContents);
            writer.Flush();
            templateStream.Seek(0, SeekOrigin.Begin);

            //read the stream into a lightweight HTML.  Note that we use from LightWeightHTMLDocument.FromIHTMLDocument2
            //instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration
            //above the docType (bug 289357)
            IHTMLDocument2          doc  = HTMLDocumentHelper.StreamToHTMLDoc(templateStream, templateUrl, true);
            LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, templateUrl, true, false);

            PageDownloadContext downloadContext = new PageDownloadContext(0);

            ApplyCredentials(downloadContext, templateUrl);
            using (PageToDownloadFactory downloadFactory = new PageToDownloadFactory(ldoc, downloadContext, _parentControl))
            {
                //calculate the dependent styles and resources
                ProgressTick tick = new ProgressTick(progress, 50, 100);
                downloadFactory.CreatePagesToDownload(tick);
                tick.UpdateProgress(100, 100);

                //download the dependent styles and resources
                tick = new ProgressTick(progress, 50, 100);
                PageAndReferenceDownloader downloader = new PageAndReferenceDownloader(downloadFactory.PagesToDownload, files);
                this.ApplyCredentials(downloader, templateUrl);
                downloader.Download(tick);
                tick.UpdateProgress(100, 100);

                //Expand out the relative paths in the downloaded HTML file with absolute paths.
                //Note: this is necessary so that template resources are not improperly resolved relative
                //      to the location of the file the editor is editing.
                string blogTemplateFile = Path.Combine(_blogTemplateDir, files.RootFile);
                string origFile         = blogTemplateFile + ".token";
                File.Move(blogTemplateFile, origFile);
                string absPath = String.Format(CultureInfo.InvariantCulture, "file:///{0}/{1}", _blogTemplateDir.Replace('\\', '/'), downloader.PathToken);
                TextHelper.ReplaceInFile(origFile, downloader.PathToken, blogTemplateFile, absPath);
                File.Delete(origFile);

                //fix up the files
                FixupDownloadedFiles(blogTemplateFile, files, downloader.PathToken);

                //complete the progress.
                progress.UpdateProgress(100, 100);

                File.WriteAllText(blogTemplateFile + ".path", absPath);
                return(blogTemplateFile);
            }
        }
        public void UpdateBasedUponHTMLDocumentData(IHTMLDocument2 document, string baseUrl)
        {
            if (_frames == null)
            {
                SetFrames(GetLightWeightDocumentForFrames(document));
            }

            if (_styleResourceUrls == null)
            {
                SetStyleReferences(HTMLDocumentHelper.GetStyleReferencesForDocument(document, baseUrl));
            }
        }
        /// <summary>
        /// Creates a set of BlogTemplateFiles using a specific region locator strategy.
        /// </summary>
        /// <param name="progress"></param>
        /// <param name="regionLocatorStrategy"></param>
        /// <param name="templateStrategies"></param>
        /// <param name="templateTypes"></param>
        /// <returns></returns>
        private BlogEditingTemplateFile[] GetBlogTemplateFiles(IProgressHost progress, BlogPostRegionLocatorStrategy regionLocatorStrategy, BlogEditingTemplateStrategy[] templateStrategies, BlogEditingTemplateType[] templateTypes)
        {
            BlogEditingTemplateFile[] blogTemplateFiles = null;
            try
            {
                regionLocatorStrategy.PrepareRegions(new ProgressTick(progress, 25, 100));

                ArrayList    templateFiles = new ArrayList();
                ProgressTick tick          = new ProgressTick(progress, 50, 100);
                for (int i = 0; i < templateTypes.Length; i++)
                {
                    ProgressTick parseTick = new ProgressTick(tick, 1, templateTypes.Length);
                    try
                    {
                        CheckCancelRequested(parseTick);
                        templateStrategy = templateStrategies[i];

                        // Parse the blog post HTML into an editing template.
                        // Note: we can't use MarkupServices to parse the document from a non-UI thread,
                        // so we have to execute the parsing portion of the template download operation on the UI thread.
                        string editingTemplate = ParseWebpageIntoEditingTemplate_OnUIThread(_parentControl, regionLocatorStrategy, new ProgressTick(parseTick, 1, 5));

                        // check for cancel
                        CheckCancelRequested(parseTick);

                        string baseUrl = HTMLDocumentHelper.GetBaseUrl(editingTemplate, _blogHomepageUrl);

                        // Download the template stylesheets and embedded resources (this lets the editing template load faster..and works offline!)
                        string templateFile = DownloadTemplateFiles(editingTemplate, baseUrl, new ProgressTick(parseTick, 4, 5));
                        templateFiles.Add(new BlogEditingTemplateFile(templateTypes[i], templateFile));
                    }
                    catch (BlogClientAbortGettingTemplateException)
                    {
                        Trace.WriteLine(String.Format(CultureInfo.CurrentCulture, "Failed to download template {0}.  Aborting getting further templates", templateTypes[i].ToString()));
                        throw;
                    }
                    catch (Exception e)
                    {
                        Trace.WriteLine(String.Format(CultureInfo.CurrentCulture, "Failed to download template {0}: {1}", templateTypes[i].ToString(), e.ToString()));
                    }
                }
                if (templateFiles.Count > 0)
                {
                    blogTemplateFiles = (BlogEditingTemplateFile[])templateFiles.ToArray(typeof(BlogEditingTemplateFile));
                }
            }
            finally
            {
                regionLocatorStrategy.CleanupRegions(new ProgressTick(progress, 25, 100));
            }
            return(blogTemplateFiles);
        }
        public BlogEditingTemplate(string template, bool containsTitle)
        {
            ContainsTitle = containsTitle;
            if (!ValidateTemplate(template, ContainsTitle))
            {
                Trace.WriteLine("Invalid editing template detected");
                template = GetDefaultTemplateHtml(containsTitle);
            }

            //sandbox the template in the Internet Security zone
            template = HTMLDocumentHelper.AddMarkOfTheWeb(template, "about:internet");
            Template = template;
        }
Exemple #14
0
        public override BlogPostRegions LocateRegionsOnUIThread(IProgressHost progress)
        {
            blogHomepageContents.Seek(0, SeekOrigin.Begin);
            IHTMLDocument2 doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(blogHomepageContents, _blogHomepageUrl);

            // Ensure that the document is fully loaded.
            // If it is not fully loaded, then viewing its current style is non-deterministic.
            DateTime startedDoingEvents = DateTime.Now;

            while (!progress.CancelRequested && !HTMLDocumentHelper.IsReady(doc2))
            {
                if (DateTime.Now.Subtract(startedDoingEvents).TotalMilliseconds > 10000)
                {
                    // Timing out here is not fatal.
                    Trace.WriteLine("Timed out while loading blog homepage for theme detection.");
                    break;
                }

                Application.DoEvents();
            }

            //The Google/Blogger dynamic templates load the pages dynmaically usig Ajax, so we dont have any template to use.
            if (IsUsingDynamicTemplate(doc2))
            {
                throw new BlogClientAbortGettingTemplateException();
            }

            IHTMLElement[] titles = FindText(_titleText, doc2.body);
            IHTMLElement[] bodies = FindText(_bodyText, doc2.body);
            if (titles.Length == 0 || bodies.Length == 0)
            {
                throw new Exception("Unable to locate blog post elements using most recent post");
            }

            if (IsSmartContent(bodies[0]))
            {
                throw new Exception("Most recent post is smart content");
            }

            BlogPostRegions regions = new BlogPostRegions();

            regions.TitleRegions = titles;

            //scrub the post body element to avoid improperly including extraneous parent elements
            regions.BodyRegion = ScrubPostBodyRegionParentElements(bodies[0]);
            regions.Document   = doc2;

            progress.UpdateProgress(100, 100);

            return(regions);
        }
        private void DisplayHtml(string html, Formatter formatter)
        {
            if (htmlPath == null)
            {
                htmlPath = TempFileManager.Instance.CreateTempFile("video.html");
            }

            html = formatter(html);

            html = HTMLDocumentHelper.AddMarkOfTheWeb(html, "about:internet");

            FileHelper.WriteFile(htmlPath, html, false, Encoding.UTF8);

            previewBox.Navigate(htmlPath);
        }
Exemple #16
0
        /// <summary>
        /// Downloads a webpage from a blog and searches for TEMPORARY_POST_TITLE_GUID.
        /// </summary>
        /// <param name="blogPageUrl"></param>
        /// <param name="progress"></param>
        /// <returns>Stream containing document which contains TEMPORARY_POST_TITLE_GUID.</returns>
        private Stream DownloadBlogPage(string blogPageUrl, IProgressHost progress)
        {
            ProgressTick   tick      = new ProgressTick(progress, 50, 100);
            MemoryStream   memStream = new MemoryStream();
            IHTMLDocument2 doc2      = null;

            // WinLive 221984: Theme detection timing out intermittently on WordPress.com
            // The temp post *often* takes more than a minute to show up on the blog home page.
            // The download progress dialog has a cancel button, we'll try a lot before giving up.
            for (int i = 0; i < 30 && doc2 == null; i++)
            {
                if (progress.CancelRequested)
                {
                    throw new OperationCancelledException();
                }
                tick.UpdateProgress(0, 0, Res.Get(StringId.ProgressDownloadingWeblogEditingStyle));
                // Sleep to give the post enough time to show up.
                // We'll make 10 attempts with a 1 second delay.
                // Subsequent attempts will use a 10 second delay.
                // This means we'll try for 5 minutes (10s + 290s = 300s) before we consider the operation timed out.
                Thread.Sleep(i < 10 ? 1000 : 10000);

                // Add random parameter to URL to bypass cache
                var urlRandom = UrlHelper.AppendQueryParameters(blogPageUrl, new string[] { Guid.NewGuid().ToString() });

                HttpWebResponse resp = _pageDownloader(urlRandom, 60000);
                memStream = new MemoryStream();
                using (Stream respStream = resp.GetResponseStream())
                    StreamHelper.Transfer(respStream, memStream);

                //read in the HTML file and determine if it contains the title element
                memStream.Seek(0, SeekOrigin.Begin);
                doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(memStream, urlRandom);
                if (HTMLDocumentHelper.FindElementContainingText(doc2, TEMPORARY_POST_TITLE_GUID) == null)
                {
                    doc2 = null;
                }
            }
            if (doc2 == null)
            {
                throw new OperationTimedOutException();
            }
            tick.UpdateProgress(100, 100);

            //return the stream
            memStream.Seek(0, SeekOrigin.Begin);
            return(memStream);
        }
        public static LightWeightHTMLDocument FromString(string html, string baseUrl, string name, bool escapePaths)
        {
            string escapedHtml = html;

            if (escapePaths)
            {
                escapedHtml = LightWeightHTMLUrlToAbsolute.ConvertToAbsolute(html, baseUrl);
            }

            LightWeightHTMLDocument escapedDocument = new LightWeightHTMLDocument(escapedHtml, baseUrl, name);

            HTMLDocumentHelper.SpecialHeaders specialHeaders = HTMLDocumentHelper.GetSpecialHeaders(escapedHtml, baseUrl);
            escapedDocument._docType   = specialHeaders.DocType;
            escapedDocument._savedFrom = specialHeaders.SavedFrom;
            escapedDocument.Parse();
            return(escapedDocument);
        }
Exemple #18
0
        public string Capture(int timeoutMs)
        {
            // flag indicating whether we should continue with the capture
            bool continueCapture = true;

            // request the page
            HttpWebResponse response = RequestPage(TargetUrl, timeoutMs);

            OnHeadersReceived(response.Headers, ref continueCapture);
            if (!continueCapture)
            {
                throw new OperationCancelledException();
            }

            // transfer it to a stream
            MemoryStream pageStream = new MemoryStream();

            using (Stream responseStream = response.GetResponseStream())
                StreamHelper.Transfer(responseStream, pageStream);
            pageStream.Seek(0, SeekOrigin.Begin);

            // allow filter on content
            OnContentReceived(new StreamReader(pageStream).ReadToEnd(), ref continueCapture);
            if (!continueCapture)
            {
                throw new OperationCancelledException();
            }
            pageStream.Seek(0, SeekOrigin.Begin);

            // Read the stream into a lightweight HTML doc. We use from LightWeightHTMLDocument.FromIHTMLDocument2
            // instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration
            // above the docType (bug 289357)
            IHTMLDocument2          doc  = HTMLDocumentHelper.StreamToHTMLDoc(pageStream, TargetUrl, false);
            LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, TargetUrl, true);

            // download references
            FileBasedSiteStorage       siteStorage = new FileBasedSiteStorage(DestinationPath, "index.htm");
            PageToDownload             page        = new PageToDownload(ldoc, TargetUrl, siteStorage.RootFile);
            PageAndReferenceDownloader downloader  = new PageAndReferenceDownloader(new PageToDownload[] { page }, siteStorage);

            downloader.Download(new TimeoutProgressHost(timeoutMs));

            // return path to captured page
            return(Path.Combine(DestinationPath, siteStorage.RootFile));
        }
Exemple #19
0
        private static bool ContainsNormalizedText(string normalizedText, IHTMLElement e)
        {
            string elementText = e.innerText;

            if (elementText != null)
            {
                // The normalizedText has run through HTMLDocumentHelper.HTMLToPlainText and NormalizeText.
                // We need to do the same to e.innerHTML to ensure we're comparing apples to apples.
                elementText = NormalizeText(HTMLDocumentHelper.HTMLToPlainText(e.innerHTML));

                int index = elementText.IndexOf(normalizedText, StringComparison.CurrentCulture);
                return(index > -1);
            }
            else
            {
                return(false);
            }
        }
Exemple #20
0
        /// <summary>
        /// Event handler that is called when the Async Web request is complete
        /// </summary>
        private void WebRequestComplete(object send, EventArgs e)
        {
            IHTMLDocument2 htmlDoc = null;
            Stream         stream  = m_webRequest.ResponseStream;

            if (stream != null)
            {
                using (StreamReader reader = new StreamReader(stream))
                {
                    htmlDoc = HTMLDocumentHelper.StringToHTMLDoc(reader.ReadToEnd(), m_url);
                }
                if (htmlDoc != null)
                {
                    MetaData = new HTMLMetaData(htmlDoc);
                }
            }
            FireMetaDataComplete();
        }
 /// <summary>
 /// Detect the background color of a post body from a URI where
 /// the post body element contains BlogEditingTemplate.POST_BODY_MARKER.
 /// This must be done in a browser (as opposed to a simple DOM) because
 /// the page elements don't layout relative to each other unless they
 /// are being displayed in a browser.
 /// </summary>
 public static Color?DetectColor(string uri, Color?defaultColor)
 {
     return(BrowserOperationInvoker.InvokeAfterDocumentComplete(uri, "BackgroundColorDetector", 700, 700, defaultColor,
                                                                delegate(ExplorerBrowserControl browser)
     {
         IHTMLDocument2 document = browser.Document as IHTMLDocument2;
         IHTMLElement[] elements = HTMLDocumentHelper.FindElementsContainingText(document, BlogEditingTemplate.POST_BODY_MARKER);
         if (elements.Length == 1)
         {
             IHTMLElement postBody = elements[0];
             if (postBody.offsetHeight < 300)
             {
                 postBody.style.height = 300;
             }
             return HTMLColorHelper.GetBackgroundColor(postBody, true, null, Color.White);
         }
         return defaultColor;
     }));
 }
        private static Stream GetStreamForUrl(string url, string pageUrl, IHTMLElement element)
        {
            if (UrlHelper.IsFileUrl(url))
            {
                string path = new Uri(url).LocalPath;
                if (File.Exists(path))
                {
                    return(File.OpenRead(path));
                }
                else
                {
                    if (ApplicationDiagnostics.AutomationMode)
                    {
                        Trace.WriteLine("File " + url + " not found");
                    }
                    else
                    {
                        Trace.Fail("File " + url + " not found");
                    }
                    return(null);
                }
            }
            else if (UrlHelper.IsUrlDownloadable(url))
            {
                return(HttpRequestHelper.SafeDownloadFile(url));
            }
            else
            {
                string baseUrl = HTMLDocumentHelper.GetBaseUrlFromDocument((IHTMLDocument2)element.document) ?? pageUrl ?? ((IHTMLDocument2)element.document).url;
                if (baseUrl == null)
                {
                    return(null);
                }

                url = UrlHelper.EscapeRelativeURL(baseUrl, url);
                if (UrlHelper.IsUrlDownloadable(url))
                {
                    return(HttpRequestHelper.SafeDownloadFile(url));
                }
            }
            return(null);
        }
Exemple #23
0
        public override void PrepareRegions(IProgressHost progress)
        {
            BlogPost[] posts = _blogClient.GetRecentPosts(_blogAccount.BlogId, 1, false, DateTime.UtcNow);
            if (posts == null || posts.Length == 0)
            {
                recentPostCount = 0;
                throw new Exception("No recent posts available");
            }
            else
            {
                recentPostCount = posts.Length;
            }

            mostRecentPost = posts[0];
            _titleText     = HTMLDocumentHelper.HTMLToPlainText(mostRecentPost.Title);
            _bodyText      = HTMLDocumentHelper.HTMLToPlainText(mostRecentPost.MainContents);

            string normalizedTitleText = NormalizeText(_titleText);
            string normalizedBodyText  = NormalizeText(_bodyText);

            //verify the normalized content is unique enough to distinctly identify the post regions
            if (normalizedTitleText.Length < 4)
            {
                throw new ArgumentException("Title text is not unique enough to use for style detection");
            }
            if (normalizedBodyText.Length < 8 || normalizedBodyText.IndexOf(' ') == -1)
            {
                throw new ArgumentException("Content text is not unique enough to use for style detection");
            }
            if (normalizedBodyText.IndexOf(normalizedTitleText, StringComparison.CurrentCulture) != -1) //title text is a subset of the body text
            {
                throw new ArgumentException("Title text is not unique enough to use for style detection");
            }
            if (normalizedTitleText.IndexOf(normalizedBodyText, StringComparison.CurrentCulture) != -1) //body text is a subset of the title text
            {
                throw new ArgumentException("Content text is not unique enough to use for style detection");
            }

            blogHomepageContents = DownloadBlogPage(_blogHomepageUrl, progress);
        }
Exemple #24
0
        protected virtual string InsertSpecialHeaders(string html)
        {
            // For local file urls, preserve the 'saved from' so that the page or snippet stays in the correct sandbox
            // For web based captures, ignore the existing savedFrom and use the current url as the saved from
            HTMLDocumentHelper.SpecialHeaders specialHeaders = HTMLDocumentHelper.GetSpecialHeaders(html, Url);
            if (specialHeaders.SavedFrom == null && _metaData != null && _metaData.SavedFrom != null)
            {
                html = html.Insert(0, _metaData.SavedFrom + "\r\n");
            }
            else if (specialHeaders.SavedFrom == null && Url != null && Url != string.Empty && !UrlHelper.IsFileUrl(Url))
            {
                html = html.Insert(0, UrlHelper.GetSavedFromString(Url) + "\r\n");
            }

            // Insure any doctype declaration is there
            if (_metaData != null && _metaData.DocType != null)
            {
                html = html.Insert(0, _metaData.DocType);
            }

            return(html);
        }
Exemple #25
0
        private BlogPostRegions ParseBlogPostIntoTemplate(Stream stream, string postSourceUrl, IProgressHost progress)
        {
            progress.UpdateProgress(Res.Get(StringId.ProgressCreatingEditingTemplate));

            //parse the document to create the blog template
            IHTMLDocument2 doc2 = HTMLDocumentHelper.GetHTMLDocumentFromStream(stream, postSourceUrl);
            IHTMLDocument3 doc  = (IHTMLDocument3)doc2;

            IHTMLElement[] titleElements = HTMLDocumentHelper.FindElementsContainingText(doc2, TEMPORARY_POST_TITLE_GUID);

            IHTMLElement bodyElement = HTMLDocumentHelper.FindElementContainingText(doc2, TEMPORARY_POST_BODY_GUID);

            if (bodyElement != null && bodyElement.tagName == "P")
            {
                //the body element is the <p> we planted, so replace it with a DIV since that will be the safest
                //element to have a as parent to all post content.
                IHTMLElement div = doc2.createElement("div");
                (bodyElement.parentElement as IHTMLDOMNode).replaceChild(div as IHTMLDOMNode, bodyElement as IHTMLDOMNode);
                bodyElement = div;
            }

            //locate the title element.  Note that is there are more than 1 copies of the title text detected, we use the one
            //that is anchored closest to the left or the body element.
            if (titleElements.Length > 0)
            {
                BlogPostRegions regions = new BlogPostRegions();
                regions.Document     = (IHTMLDocument)doc;
                regions.TitleRegions = titleElements;
                regions.BodyRegion   = bodyElement;

                progress.UpdateProgress(100, 100);
                return(regions);
            }
            else
            {
                throw new Exception("unable to access test post.");
            }
        }
        private static string GetBodyText(HttpWebResponse resp)
        {
            if (resp.ContentType != null && resp.ContentType.Length > 0)
            {
                IDictionary contentTypeData = MimeHelper.ParseContentType(resp.ContentType, true);
                string      mainType        = (string)contentTypeData[""];
                switch (mainType)
                {
                case "text/plain":
                {
                    return(DecodeBody(resp));
                }

                case "text/html":
                {
                    return(StringHelper.CompressExcessWhitespace(
                               HTMLDocumentHelper.HTMLToPlainText(
                                   LightWeightHTMLThinner2.Thin(
                                       DecodeBody(resp), true))));
                }
                }
            }
            return("");
        }
Exemple #27
0
        protected override object DetectBlogService(IProgressHost progressHost)
        {
            using (BlogClientUIContextSilentMode uiContextScope = new BlogClientUIContextSilentMode()) //suppress prompting for credentials
            {
                try
                {
                    // get the weblog homepage and rsd service description if available
                    IHTMLDocument2 weblogDOM = GetWeblogHomepageDOM(progressHost);

                    // while we have the DOM available, scan for a writer manifest url
                    if (_manifestDownloadInfo == null)
                    {
                        string manifestUrl = WriterEditingManifest.DiscoverUrl(_homepageUrl, weblogDOM);
                        if (manifestUrl != String.Empty)
                        {
                            _manifestDownloadInfo = new WriterEditingManifestDownloadInfo(manifestUrl);
                        }
                    }

                    string html = weblogDOM != null?HTMLDocumentHelper.HTMLDocToString(weblogDOM) : null;

                    bool detectionSucceeded = false;

                    if (!detectionSucceeded)
                    {
                        detectionSucceeded = AttemptGenericAtomLinkDetection(_homepageUrl, html, !ApplicationDiagnostics.PreferAtom);
                    }

                    if (!detectionSucceeded && _blogSettings.IsGoogleBloggerBlog)
                    {
                        detectionSucceeded = AttemptBloggerDetection(_homepageUrl, html);
                    }

                    if (!detectionSucceeded)
                    {
                        RsdServiceDescription rsdServiceDescription = GetRsdServiceDescription(progressHost, weblogDOM);

                        // if there was no rsd service description or we fail to auto-configure from the
                        // rsd description then move on to other auto-detection techniques
                        if (!(detectionSucceeded = AttemptRsdBasedDetection(progressHost, rsdServiceDescription)))
                        {
                            // try detection by analyzing the homepage url and contents
                            UpdateProgress(progressHost, 75, Res.Get(StringId.ProgressAnalyzingHomepage));
                            if (weblogDOM != null)
                            {
                                detectionSucceeded = AttemptHomepageBasedDetection(_homepageUrl, html);
                            }
                            else
                            {
                                detectionSucceeded = AttemptUrlBasedDetection(_homepageUrl);
                            }

                            // if we successfully detected then see if we can narrow down
                            // to a specific weblog
                            if (detectionSucceeded)
                            {
                                if (!BlogProviderParameters.UrlContainsParameters(_postApiUrl))
                                {
                                    // we detected the provider, now see if we can detect the weblog id
                                    // (or at lease the list of the user's weblogs)
                                    UpdateProgress(progressHost, 80, Res.Get(StringId.ProgressAnalyzingWeblogList));
                                    AttemptUserBlogDetection();
                                }
                            }
                        }
                    }

                    if (!detectionSucceeded && html != null)
                    {
                        AttemptGenericAtomLinkDetection(_homepageUrl, html, false);
                    }

                    // finished
                    UpdateProgress(progressHost, 100, String.Empty);
                }
                catch (OperationCancelledException)
                {
                    // WasCancelled == true
                }
                catch (BlogClientOperationCancelledException)
                {
                    Cancel();
                    // WasCancelled == true
                }
                catch (BlogAccountDetectorException ex)
                {
                    if (ApplicationDiagnostics.AutomationMode)
                    {
                        Trace.WriteLine(ex.ToString());
                    }
                    else
                    {
                        Trace.Fail(ex.ToString());
                    }
                    // ErrorOccurred == true
                }
                catch (Exception ex)
                {
                    // ErrorOccurred == true
                    Trace.Fail(ex.Message, ex.ToString());
                    ReportError(MessageId.WeblogDetectionUnexpectedError, ex.Message);
                }

                return(this);
            }
        }
Exemple #28
0
        public HTMLDataObject(IHTMLDocument2 document)
        {
            string html = HTMLDocumentHelper.HTMLDocToString(document);

            IDataObject = new DataObject(DataFormats.Html, GetHTMLFormatString(html, document.url));
        }
        /// <summary>
        /// Creates a set of BlogTemplateFiles using a specific region locator strategy.
        /// </summary>
        /// <param name="progress"></param>
        /// <param name="regionLocatorStrategy"></param>
        /// <param name="templateStrategies"></param>
        /// <param name="templateTypes"></param>
        /// <param name="targetUrl">
        /// The URL to analyze. If a post can be located, but not the body, this is used
        /// to reiterate into the post it fetch it's content directly.
        /// </param>
        /// <returns></returns>
        private BlogEditingTemplateFile[] GetBlogTemplateFiles(IProgressHost progress, BlogPostRegionLocatorStrategy regionLocatorStrategy, BlogEditingTemplateStrategy[] templateStrategies, BlogEditingTemplateType[] templateTypes, string targetUrl)
        {
            BlogEditingTemplateFile[] blogTemplateFiles = null;
            try
            {
                regionLocatorStrategy.PrepareRegions(new ProgressTick(progress, 25, 100));

                ArrayList    templateFiles = new ArrayList();
                ProgressTick tick          = new ProgressTick(progress, 50, 100);
                for (int i = 0; i < templateTypes.Length; i++)
                {
                    ProgressTick parseTick = new ProgressTick(tick, 1, templateTypes.Length);
                    try
                    {
                        CheckCancelRequested(parseTick);
                        templateStrategy = templateStrategies[i];

                        // Clear _nextTryPostUrl flag
                        _nextTryPostUrl = null;

                        // Parse the blog post HTML into an editing template.
                        // Note: we can't use MarkupServices to parse the document from a non-UI thread,
                        // so we have to execute the parsing portion of the template download operation on the UI thread.
                        string editingTemplate = ParseWebpageIntoEditingTemplate_OnUIThread(_parentControl, regionLocatorStrategy, new ProgressTick(parseTick, 1, 5), targetUrl);

                        // If there's no editing template, there should be a URL to try next
                        Debug.Assert(editingTemplate != null || (editingTemplate == null && _nextTryPostUrl != null));

                        // If the homepage has just been analysed and the _nextTryPostUrl flag is set
                        if (targetUrl == _blogHomepageUrl && _nextTryPostUrl != null && regionLocatorStrategy.CanRefetchPage)
                        {
                            // Try fetching the URL that has been specified, and reparse
                            progress.UpdateProgress(Res.Get(StringId.ProgressDownloadingWeblogEditingStyleDeep));
                            // Fetch the post page
                            regionLocatorStrategy.FetchTemporaryPostPage(SilentProgressHost.Instance, _nextTryPostUrl);
                            // Parse out the template
                            editingTemplate = ParseWebpageIntoEditingTemplate_OnUIThread(_parentControl, regionLocatorStrategy, new ProgressTick(parseTick, 1, 5), _nextTryPostUrl);
                        }

                        // check for cancel
                        CheckCancelRequested(parseTick);

                        string baseUrl = HTMLDocumentHelper.GetBaseUrl(editingTemplate, _blogHomepageUrl);

                        // Download the template stylesheets and embedded resources (this lets the editing template load faster..and works offline!)
                        string templateFile = DownloadTemplateFiles(editingTemplate, baseUrl, new ProgressTick(parseTick, 4, 5));
                        templateFiles.Add(new BlogEditingTemplateFile(templateTypes[i], templateFile));
                    }
                    catch (BlogClientAbortGettingTemplateException)
                    {
                        Trace.WriteLine(String.Format(CultureInfo.CurrentCulture, "Failed to download template {0}.  Aborting getting further templates", templateTypes[i].ToString()));
                        throw;
                    }
                    catch (Exception e)
                    {
                        Trace.WriteLine(String.Format(CultureInfo.CurrentCulture, "Failed to download template {0}: {1}", templateTypes[i].ToString(), e.ToString()));
                    }
                }
                if (templateFiles.Count > 0)
                {
                    blogTemplateFiles = (BlogEditingTemplateFile[])templateFiles.ToArray(typeof(BlogEditingTemplateFile));
                }
            }
            finally
            {
                regionLocatorStrategy.CleanupRegions(new ProgressTick(progress, 25, 100));
            }
            return(blogTemplateFiles);
        }
Exemple #30
0
 public void AttachToMapDocument(IHTMLDocument2 document)
 {
     HTMLDocumentHelper.InjectObjectIntoScriptingEnvironment(document, "jsMapController", _jsMapController);
 }