public static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url, bool escapePaths, bool escapeEmptyString) { if (htmlDocument == null) { return(null); } return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, null, escapePaths, escapeEmptyString)); }
public static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url) { if (htmlDocument == null) { return(null); } return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, true)); }
public static LightWeightHTMLDocument[] GetLightWeightDocumentForFrames(IHTMLDocument2 htmlDocument) { ArrayList frameLightWeightDocuments = new ArrayList(); // Get the IOleContainer for the for the html document (this requires that // the document is the root document in the browser) IOleContainer oleContainer = (IOleContainer)htmlDocument; IEnumUnknown enumUnknown; // Enumerate the controls in the browser oleContainer.EnumObjects(OLECONTF.EMBEDDINGS, out enumUnknown); // Iterate through the controls object unknown; for (int i = 0; HRESULT.S_OK == enumUnknown.Next(1, out unknown, IntPtr.Zero); i++) { // Only subframes should cast to IWebBrowser2 IWebBrowser2 webBrowser = unknown as IWebBrowser2; // Since it is a subframe, we can also get the base frame implementation for it IHTMLFrameBase frameBase = unknown as IHTMLFrameBase; // It's a frame, add this to the list! if (webBrowser != null) { try { IHTMLDocument2 frameDocument = webBrowser.Document as IHTMLDocument2; if (frameDocument != null) { LightWeightHTMLDocument document = LightWeightHTMLDocument.FromIHTMLDocument2(frameDocument, frameDocument.url, frameBase.name); if (document != null) { frameLightWeightDocuments.Add(document); } } } catch (InvalidCastException) { string html = "<HTML></HTML>"; LightWeightHTMLDocument document = LightWeightHTMLDocument.FromString(html, webBrowser.LocationURL, webBrowser.LocationURL, true); if (document != null) { frameLightWeightDocuments.Add(document); } } } } return((LightWeightHTMLDocument[])frameLightWeightDocuments.ToArray(typeof(LightWeightHTMLDocument))); }
public string Capture(int timeoutMs) { // flag indicating whether we should continue with the capture bool continueCapture = true; // request the page HttpWebResponse response = RequestPage(TargetUrl, timeoutMs); OnHeadersReceived(response.Headers, ref continueCapture); if (!continueCapture) { throw new OperationCancelledException(); } // transfer it to a stream MemoryStream pageStream = new MemoryStream(); using (Stream responseStream = response.GetResponseStream()) StreamHelper.Transfer(responseStream, pageStream); pageStream.Seek(0, SeekOrigin.Begin); // allow filter on content OnContentReceived(new StreamReader(pageStream).ReadToEnd(), ref continueCapture); if (!continueCapture) { throw new OperationCancelledException(); } pageStream.Seek(0, SeekOrigin.Begin); // Read the stream into a lightweight HTML doc. We use from LightWeightHTMLDocument.FromIHTMLDocument2 // instead of LightWeightHTMLDocument.FromStream because from stream improperly shoves a saveFrom declaration // above the docType (bug 289357) IHTMLDocument2 doc = HTMLDocumentHelper.StreamToHTMLDoc(pageStream, TargetUrl, false); LightWeightHTMLDocument ldoc = LightWeightHTMLDocument.FromIHTMLDocument2(doc, TargetUrl, true); // download references FileBasedSiteStorage siteStorage = new FileBasedSiteStorage(DestinationPath, "index.htm"); PageToDownload page = new PageToDownload(ldoc, TargetUrl, siteStorage.RootFile); PageAndReferenceDownloader downloader = new PageAndReferenceDownloader(new PageToDownload[] { page }, siteStorage); downloader.Download(new TimeoutProgressHost(timeoutMs)); // return path to captured page return(Path.Combine(DestinationPath, siteStorage.RootFile)); }
private PageToDownload DownloadUrl(string url, PageToDownload parent, IProgressHost progress) { PageToDownload thisPageToDownload = null; // Download the current page LightWeightHTMLDocument lightWeightDoc = null; using (HTMLDocumentDownloader downloader = new HTMLDocumentDownloader(_parentControl, url, null, _context.CookieString, _context.TimeoutMS, true)) { downloader.DownloadHTMLDocument(progress); lightWeightDoc = LightWeightHTMLDocument.FromIHTMLDocument2(downloader.HtmlDocument, downloader.Url); thisPageToDownload = new PageToDownload(lightWeightDoc, url, null, parent); // Reset the url in the event that a redirect occurred thisPageToDownload.AbsoluteUrl = downloader.Url; } foreach (HTMLDocumentHelper.ResourceUrlInfo styleUrl in lightWeightDoc.StyleResourcesUrls) { thisPageToDownload.AddReference(new ReferenceToDownload(styleUrl.ResourceUrl, thisPageToDownload, styleUrl.ResourceAbsoluteUrl)); } return(thisPageToDownload); }
private static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url, string name, bool escapePaths) { return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, name, escapePaths, true)); }
private static LightWeightHTMLDocument FromIHTMLDocument2(IHTMLDocument2 htmlDocument, string url, string name) { return(LightWeightHTMLDocument.FromIHTMLDocument2(htmlDocument, url, name, true)); }