public static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url, bool escapePaths, bool escapeEmptyString)
        {
            if (htmlDocument == null)
            {
                return(null);
            }

            return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, null, escapePaths, escapeEmptyString));
        }
        public static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url)
        {
            if (htmlDocument == null)
            {
                return(null);
            }

            return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, true));
        }
        public static LightWeightHTMLDocument[] GetLightWeightDocumentForFrames(IHTMLDocument2 htmlDocument)
        {
            ArrayList frameLightWeightDocuments = new ArrayList();

            // Get the IOleContainer for the for the html document (this requires that
            // the document is the root document in the browser)
            IOleContainer oleContainer = (IOleContainer)htmlDocument;
            IEnumUnknown  enumUnknown;

            // Enumerate the controls in the browser
            oleContainer.EnumObjects(OLECONTF.EMBEDDINGS, out enumUnknown);

            // Iterate through the controls
            object unknown;

            for (int i = 0; HRESULT.S_OK == enumUnknown.Next(1, out unknown, IntPtr.Zero); i++)
            {
                // Only subframes should cast to IWebBrowser2
                IWebBrowser2 webBrowser = unknown as IWebBrowser2;

                // Since it is a subframe, we can also get the base frame implementation for it
                IHTMLFrameBase frameBase = unknown as IHTMLFrameBase;

                // It's a frame, add this to the list!
                if (webBrowser != null)
                {
                    try
                    {
                        IHTMLDocument2 frameDocument = webBrowser.Document as IHTMLDocument2;

                        if (frameDocument != null)
                        {
                            LightWeightHTMLDocument document = LightWeightHTMLDocument.FromIHTMLDocument2(frameDocument, frameDocument.url, frameBase.name);
                            if (document != null)
                            {
                                frameLightWeightDocuments.Add(document);
                            }
                        }
                    }
                    catch (InvalidCastException)
                    {
                        string html = "<HTML></HTML>";
                        LightWeightHTMLDocument document = LightWeightHTMLDocument.FromString(html, webBrowser.LocationURL, webBrowser.LocationURL, true);
                        if (document != null)
                        {
                            frameLightWeightDocuments.Add(document);
                        }
                    }
                }
            }
            return((LightWeightHTMLDocument[])frameLightWeightDocuments.ToArray(typeof(LightWeightHTMLDocument)));
        }
Ejemplo n.º 4
0
        public string Capture(int timeoutMs)
        {
            // flag indicating whether we should continue with the capture
            bool continueCapture = true;

            // request the page
            HttpWebResponse response = RequestPage(TargetUrl, timeoutMs);

            OnHeadersReceived(response.Headers, ref continueCapture);
            if (!continueCapture)
            {
                throw new OperationCancelledException();
            }

            // transfer it to a stream
            MemoryStream pageStream = new MemoryStream();

            using (Stream responseStream = response.GetResponseStream())
                StreamHelper.Transfer(responseStream, pageStream);
            pageStream.Seek(0, SeekOrigin.Begin);

            // allow filter on content
            OnContentReceived(new StreamReader(pageStream).ReadToEnd(), ref continueCapture);
            if (!continueCapture)
            {
                throw new OperationCancelledException();
            }
            pageStream.Seek(0, SeekOrigin.Begin);

            // Read the stream into a lightweight HTML doc. We use from LightWeightHTMLDocument.FromIHTMLDocument2
            // instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration
            // above the docType (bug 289357)
            IHTMLDocument2          doc  = HTMLDocumentHelper.StreamToHTMLDoc(pageStream, TargetUrl, false);
            LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, TargetUrl, true);

            // download references
            FileBasedSiteStorage       siteStorage = new FileBasedSiteStorage(DestinationPath, "index.htm");
            PageToDownload             page        = new PageToDownload(ldoc, TargetUrl, siteStorage.RootFile);
            PageAndReferenceDownloader downloader  = new PageAndReferenceDownloader(new PageToDownload[] { page }, siteStorage);

            downloader.Download(new TimeoutProgressHost(timeoutMs));

            // return path to captured page
            return(Path.Combine(DestinationPath, siteStorage.RootFile));
        }
Ejemplo n.º 5
0
        private PageToDownload DownloadUrl(string url, PageToDownload parent, IProgressHost progress)
        {
            PageToDownload thisPageToDownload = null;

            // Download the current page
            LightWeightHTMLDocument lightWeightDoc = null;

            using (HTMLDocumentDownloader downloader = new HTMLDocumentDownloader(_parentControl, url, null, _context.CookieString, _context.TimeoutMS, true))
            {
                downloader.DownloadHTMLDocument(progress);
                lightWeightDoc     = LightWeightHTMLDocument.FromIHTMLDocument2(downloader.HtmlDocument, downloader.Url);
                thisPageToDownload = new PageToDownload(lightWeightDoc, url, null, parent);
                // Reset the url in the event that a redirect occurred
                thisPageToDownload.AbsoluteUrl = downloader.Url;
            }

            foreach (HTMLDocumentHelper.ResourceUrlInfo styleUrl in lightWeightDoc.StyleResourcesUrls)
            {
                thisPageToDownload.AddReference(new ReferenceToDownload(styleUrl.ResourceUrl, thisPageToDownload, styleUrl.ResourceAbsoluteUrl));
            }

            return(thisPageToDownload);
        }
 private static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url, string name, bool escapePaths)
 {
     return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, name, escapePaths, true));
 }
 private static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url, string name)
 {
     return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, name, true));
 }