private static IHTMLDocument2 HTMLToDom(string html) { IHTMLDocument2 doc = new HTMLDocumentClass(); doc.write(new object[] { html }); doc.close(); return doc; }
public static string SetImgLoadingHTML(string html) { IHTMLDocument2 doc = new HTMLDocumentClass(); doc.write(new object[] { html }); doc.close(); // System.Net.WebClient wc = new System.Net.WebClient(); List <string> imgs = new List <string>(); foreach (IHTMLImgElement image in doc.images) { IHTMLElement element = (IHTMLElement)image; string src = (string)element.getAttribute("src", 2); element.setAttribute("data-original", src, 1); element.setAttribute("src", Common.HtmlHelper.LoadImg200px(), 1); //if (src != null) //{ // // Uri addr = new Uri(src); // image.src = Common.HtmlHelper.LoadImg200px(); //} } return(doc.body.innerHTML); }
public static void MergeXmlHeadToHtml(HTMLDocumentClass hDoc, IHTMLDOMNode hHead, XmlNode xHead) { for (int i = 0; i < xHead.ChildNodes.Count; i++) { CopyXmlNodeToHtml(hDoc, hHead, xHead.ChildNodes[i]); } }
private bool GetContainerIdHierarchy(HTMLDocumentClass document, int containerId, ref List <int> parentIds, ref InternetExplorer webBrowser) { IHTMLElementCollection frames = document.getElementsByTagName("iframe"); foreach (HTMLIFrameClass frame in frames) { List <int> list = new List <int>(parentIds); InternetExplorer ie = GetInternetExplorer(frame.contentWindow); if (GetUrlHash(internetExplorerToLocation[ie]) == containerId) { webBrowser = ie; parentIds = list; return(true); } else { list.Add(GetUrlHash(internetExplorerToLocation[ie])); if (GetContainerIdHierarchy((HTMLDocumentClass)ie.Document, containerId, ref list, ref webBrowser)) { parentIds = list; return(true); } } } return(false); }
public static mshtml.IHTMLElement2 GetWindowWidthAndHeight(IntPtr hwnd, mshtml.IHTMLDocument2 doc, ref int clientWidth, ref int clientHeight, ref int scrollWidth, ref int scrollHeight) { HTMLDocumentClass class2 = doc as HTMLDocumentClass; mshtml.IHTMLElement2 documentElement = null; if (class2 != null) { documentElement = class2.documentElement as mshtml.IHTMLElement2; if (documentElement != null) { clientWidth = documentElement.clientWidth; clientHeight = documentElement.clientHeight; } } else { Rect lpRect = new Rect(); documentElement = doc.body as mshtml.IHTMLElement2; WindowUtil.GetWindowRect(hwnd, out lpRect); if (documentElement != null) { clientWidth = lpRect.Right - lpRect.Left; clientHeight = lpRect.Bottom - lpRect.Top; } } if (documentElement != null) { scrollWidth = documentElement.scrollWidth; scrollHeight = documentElement.scrollHeight; } return(documentElement); }
public void OpenInternetExplorer(string url) { Win32.SetWindowPos(new IntPtr(IE.HWND), (IntPtr)Win32.hWndInsertAfter.HWND_TOPMOST, 0, 0, 0, 0, Win32.TOPMOST_FLAGS); Win32.SetWindowPos(new IntPtr(IE.HWND), (IntPtr)Win32.hWndInsertAfter.HWND_NOTTOPMOST, 0, 0, 0, 0, Win32.TOPMOST_FLAGS); IE.Navigate(url); int loopCount = 0; while (IE.ReadyState != tagREADYSTATE.READYSTATE_COMPLETE) { try { dom = (HTMLDocumentClass)IE.Document; } catch (Exception) { Thread.Sleep(1000); continue; } if (dom.readyState.Equals("complete")) { break; } if (loopCount > 2000) { throw new Exception("Get " + url + " timeout!"); } Thread.Sleep(500); loopCount++; } }
private void axWebBrowser1_DocumentComplete(object sender, AxSHDocVw.DWebBrowserEvents2_DocumentCompleteEvent e) { switch (Task) { case 1: HTMLDocument myDoc = new HTMLDocumentClass(); myDoc = (HTMLDocument)axWebBrowser1.Document; // a quick look at the google html source reveals: // <INPUT maxLength="256" size="55" name="q"> // HTMLInputElement otxtSearchBox = (HTMLInputElement)myDoc.all.item("q", 0); otxtSearchBox.value = "intel corp"; // google html source for the I'm Feeling Lucky Button: // <INPUT type=submit value="I'm Feeling Lucky" name=btnI> // HTMLInputElement btnSearch = (HTMLInputElement)myDoc.all.item("btnI", 0); btnSearch.click(); Task++; break; case 2: // continuation of automated tasks... break; } }
/// <summary> /// Inject JS code that should disable popup errors /// But some JS errors still appears : Need to be fixed. /// </summary> private void InjectDisableScript() { string disableScriptError = @"function noError() {return true;} window.onerror = noError;"; HTMLDocumentClass doc = webBrowser.Document as HTMLDocumentClass; HTMLDocument doc2 = webBrowser.Document as HTMLDocument; IHTMLScriptElement scriptErrorSuppressed = (IHTMLScriptElement)doc2.createElement("SCRIPT"); scriptErrorSuppressed.type = "text/javascript"; scriptErrorSuppressed.text = disableScriptError; IHTMLElementCollection nodes = doc.getElementsByTagName("head"); foreach (IHTMLElement elem in nodes) { try { HTMLHeadElementClass head = (HTMLHeadElementClass)elem; head.appendChild((IHTMLDOMNode)scriptErrorSuppressed); } catch (Exception) { } } }
private static HTMLDocumentClass GetHtmlDocumentClassFromInternetExplorerServerHandle(IntPtr internetExplorerServerHandle) { int lngMsg = WinApi.User32.RegisterWindowMessage("WM_HTML_GETOBJECT"); if (lngMsg == 0) { return(null); } int lRes; WinApi.User32.SendMessageTimeout(internetExplorerServerHandle, lngMsg, 0, 0, NativeMethods.SMTO_ABORTIFHUNG, kHtmlGetObjectTimeout, out lRes); if (lRes == 0) { return(null); } HTMLDocumentClass document = null; Guid guid = typeof(IHTMLDocument2).GUID; int hResult = NativeMethods.ObjectFromLresult(lRes, ref guid, 0, ref document); if (hResult != 0) { return(null); } return(document); }
private void axWebBrowser1_DocumentComplete(object sender, AxSHDocVw.DWebBrowserEvents2_DocumentCompleteEvent e) { switch(Task) { case 1: HTMLDocument myDoc = new HTMLDocumentClass(); myDoc = (HTMLDocument) axWebBrowser1.Document; // a quick look at the google html source reveals: // <INPUT maxLength="256" size="55" name="q"> // HTMLInputElement otxtSearchBox = (HTMLInputElement) myDoc.all.item("q", 0); otxtSearchBox.value = "intel corp"; // google html source for the I'm Feeling Lucky Button: // <INPUT type=submit value="I'm Feeling Lucky" name=btnI> // HTMLInputElement btnSearch = (HTMLInputElement) myDoc.all.item("btnI", 0); btnSearch.click(); Task++; break; case 2: // continuation of automated tasks... break; } }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //未用 将相对路径链接方式的htmlcode转换为绝对路径的htmlcode ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// public string ConvertToAbsoluteUrls(string html, Uri relativeLocation) { IHTMLDocument2 doc = new HTMLDocumentClass(); doc.write(new object[] { html }); doc.close(); foreach (IHTMLAnchorElement anchor in doc.links) { IHTMLElement element = (IHTMLElement)anchor; string href = (string)element.getAttribute("href", 2); if (href != null) { Uri addr = new Uri(relativeLocation, href); anchor.href = addr.AbsoluteUri; } } foreach (IHTMLImgElement image in doc.images) { IHTMLElement element = (IHTMLElement)image; string src = (string)element.getAttribute("src", 2); if (src != null) { Uri addr = new Uri(relativeLocation, src); image.src = addr.AbsoluteUri; } } string ret = doc.body.innerHTML; return(ret); }
public IEOperateCore(string url) { IE = InternetExplorerFactory.GetInternetExplorer(url); HWND = new IntPtr(IE.HWND); int loopCount = 0; while (IE.ReadyState != tagREADYSTATE.READYSTATE_COMPLETE) { try { dom = (HTMLDocumentClass)IE.Document; } catch (Exception) { Thread.Sleep(1000); continue; } if (dom.readyState.Equals("complete")) { break; } if (loopCount > 2000) { throw new Exception("open " + url + " timeout!"); } Thread.Sleep(500); loopCount++; } }
private HTMLDocument GetDoc(string html) { HTMLDocument dx = new HTMLDocumentClass(); IHTMLDocument2 doc2 = (IHTMLDocument2)dx; doc2.write(new object[] { html }); return(dx); }
public void Close() { if (_doc != null) { _doc.IHTMLDocument2_close(); _doc = null; } }
private static void AddScriptToHead(HTMLDocumentClass htmlDocument, IHTMLScriptElement script) { var headElementCollection = htmlDocument.getElementsByTagName(Head); foreach (IHTMLElement elem in headElementCollection) { var head = (HTMLHeadElementClass)elem; head.appendChild((IHTMLDOMNode)script); } }
private void RenderDecodedHtml(string originalHtmlDocumentBodyInnerHtml, HTMLDocumentClass htmlDocument) { DateTime startTime = DateTime.Now; while (originalHtmlDocumentBodyInnerHtml == htmlDocument.documentElement.outerHTML && DateTime.Now.Subtract(startTime).Duration().TotalMinutes < _applicationSettings.CrawlRequestTimeoutInMinutes) { Thread.Sleep(100); } }
private static string InnerHtmlText(string html) { HTMLDocumentClass doc = new HTMLDocumentClass(); doc.designMode = "on"; doc.IHTMLDocument2_write(html); return(doc.body.innerText); }
public void UpdatePhone(string xml) { try { Phones.Clear(); XmlDocument doc = new XmlDocument(); doc.LoadXml(xml); XmlNodeList xnl = doc.GetElementsByTagName("html"); if (xnl.Count > 0) { XmlNode me = xnl[0]; xml = me.InnerXml.Trim(); if (xml.StartsWith("<![CDATA[")) { //Trace.WriteLine("Removing CDATA"); xml = xml.Substring("<![CDATA[".Length); xml = xml.Substring(0, xml.Length - 3); xml = xml.Trim(); } else { Trace.WriteLine("No CDATA!"); } object[] oPageText = { xml }; HTMLDocument dx = new HTMLDocumentClass(); IHTMLDocument2 doc2 = (IHTMLDocument2)dx; doc2.write(oPageText); IHTMLElementCollection ihec = dx.getElementsByTagName("div"); foreach (IHTMLElement e in ihec) { //Trace.WriteLine("Phone: " + e.className + " " + e.id); if (e.className == "gc-forwarding-number-ani goog-inline-block") { Phone p = new Phone(); p.Number = e.innerText; Phones.Add(p); Trace.WriteLine("Found Phone: " + p.Number); } } } else { Trace.WriteLine("Couldn't find HTML node in Phone XML"); } } catch (Exception ex) { Trace.Write("XError loading Phone: " + ex); } }
public IHTMLDocument2 ConverToTable(string html) { IHTMLDocument2 doc = new HTMLDocumentClass(); doc.write(new object[] { html }); doc.close(); var title = doc.title; var body = doc.body.innerText; return(doc); }
public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass) { foreach (IHTMLElement htmlElement in htmlDocumentClass.getElementsByTagName("iframe")) { if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null) { rendererMessage.PropertiesKeys.Add("iframe_src"); rendererMessage.PropertiesValues.Add(htmlElement.getAttribute("src")); } } }
private static string GetCompleteMailBodyHtml(EmailMessage email) { string body = email.Body; HTMLDocumentClass doc = new HTMLDocumentClass(); doc.designMode = "on"; doc.IHTMLDocument2_write(body); return(doc.body.outerHTML); }
public static string RetrieveInnerTextContent(string htmlContent) { object[] oPageText = { htmlContent }; HTMLDocument doc = new HTMLDocumentClass(); IHTMLDocument2 document = (IHTMLDocument2)doc; document.write(oPageText); document.close(); //Console.WriteLine(document.body.innerHTML); // whole content of body Console.WriteLine(document.body.innerText); // all plain text in body return(document.body.innerText); }
/// <summary> /// For each page that the browser loads, we look for one that fulfills all the following criteria: /// - Has a meta-element named "Application-State" (that should consist of a ";"-separated list of "key=val" pairs) /// - The content of the application-state contains "service=IDP" to tell that the response is coming from the identity provider (IDP) /// - The content of the application-state contains "authenticated=true" to tell that the authentication went well /// - The content of the application-state contains "state=token" to signal that the reponse contains the SAML token response /// If the above is true, the response contains the SAML token response as an attribute on the BODY tag named SSO_SAML2_TOKEN. /// </summary> private void OnLoadCompleted(object sender, NavigationEventArgs args) { Console.WriteLine("OnLoadCompleted: {0}", args.Uri); try { HTMLDocumentClass dom = (HTMLDocumentClass)(_browser.Document); // First, look for the application-state meta element IHTMLElementCollection applicationStateElementCollection = dom.getElementsByName("Application-State"); // If there is no application-state, it is not the right page if (applicationStateElementCollection.length < 1) { return; } // Application state looks like this: <meta name="Application-State" content="service=IDP;federated=False;env=Test;state=Ok;authenticated=True;"> string applicationState = ((HTMLMetaElement)applicationStateElementCollection.item(0)).content; // Split at ";" sepearator string[] applicationStateElements = applicationState.Split(';'); // Look for the "state=idp" - this means that the process is finished // Without checking this, we don't know whether the authentication failed or just haven't gotten there yet if (!applicationStateElements.Any(s => (s.Equals("service=idp", StringComparison.OrdinalIgnoreCase)))) { // This is not the IDP responding, probably a page earlier in the login flow - just skip it return; } // This is the IDP response page. Check if the list of values contains authenticated=true if (applicationStateElements.Any(s => (s.Equals("authenticated=true", StringComparison.OrdinalIgnoreCase))) && applicationStateElements.Any(s => (s.Equals("state=token", StringComparison.OrdinalIgnoreCase)))) { // On the final page, the SAML-Response is on an attribute on the body element, named SSO_SAML2_TOKEN HTMLBody bodyElement = ((HTMLBody)dom.getElementsByTagName("body").item(0)); object attribute = bodyElement.getAttribute("SSO_SAML2_TOKEN"); SAMLResponse = attribute.ToString(); DialogResult = true; } else { DialogResult = false; } Close(); } catch (Exception ex) { Console.Error.WriteLine(ex); Close(); } }
public static IHTMLDocument2 GetWebpageDocument(string sUrl) { StringBuilder sb = GetWebpage(sUrl); //reads the html into an html document to enable parsing IHTMLDocument2 doc = new HTMLDocumentClass(); doc.write(new object[] { sb.ToString() }); doc.close(); return(doc); }
public void CloseInternetExplorer() { if (dom != null) { dom.close(); dom = null; } if (IE != null) { InternetExplorerFactory.CloseInternetExplorer(); HWND = new IntPtr(0); } }
private void Login(GetSkypeLoginWindowDataResult skypeLoginWindowData) { // Set user name WinApi.User32.SendMessage(skypeLoginWindowData.LoginEditPtr, WinApi.MessageType.WM_SETTEXT, IntPtr.Zero, _username); // Simulate Enter press to initiate login WinApi.User32.PostMessage(skypeLoginWindowData.LoginEditPtr, WinApi.MessageType.WM_KEYDOWN, new IntPtr((int)WinApi.VirtualKeyCode.VK_RETURN), IntPtr.Zero); // Force minimize Skype if it should. For some reason Skype restores itself after login begins if (_minimized) { TimedOutOperation(1000, 1, () => { if (!WinApi.User32.IsIconic(skypeLoginWindowData.LoginWindowPtr)) { WinApi.User32.ShowWindow(skypeLoginWindowData.LoginWindowPtr, WinApi.User32.ShowWindowCommands.SW_FORCEMINIMIZE); return(true); } return(false); }); } _canThrowSkypeExitException = true; try { bool success = TimedOutOperation(kHtmlGetObjectTimeout, kOperationRetryDelay, () => { IntPtr internetExplorerServerHandle = GetInternetExplorerServerHandle(skypeLoginWindowData.LoginWindowPtr); if (internetExplorerServerHandle == IntPtr.Zero) { return(false); } _loginBrowserHtmlDocument = GetHtmlDocumentClassFromInternetExplorerServerHandle(internetExplorerServerHandle); if (_loginBrowserHtmlDocument == null) { return(false); } return(true); }); if (!success) { throw new LoginException("Unable to get the Skype login web document"); } WebBrowserLogin(); } catch (ThreadAbortException e) { throw new LoginException("Skype process has died unexpectedly", e); } finally { _canThrowSkypeExitException = false; } }
/// <summary> /// Converts and HTML string read from and html file or database into <see cref=" mshtml.IHTMLDocument2">HTML document</see> /// </summary> /// <param name="htmlString">The entry html string</param> /// <returns><see cref=" mshtml.IHTMLDocument2">HTML document</see></returns> public static mshtml.IHTMLDocument2 GetHtmlDocument(string htmlString) { if (htmlString.Length > 0) { //reads the html into an html document to enable parsing IHTMLDocument2 doc = new HTMLDocumentClass(); doc.write(new object[] { htmlString }); doc.close(); return(doc); } return(null); }
public static void CopyXmlNodeToHtml(HTMLDocumentClass hDoc, IHTMLDOMNode hNode, XmlNode xNode) { IHTMLElement ie = hDoc.createElement(xNode.Name); IHTMLDOMNode hn = ie as IHTMLDOMNode; hNode.appendChild(hn); if (xNode.NodeType == XmlNodeType.CDATA) { ie.innerText = xNode.Value; } else if (xNode.NodeType == XmlNodeType.Text) { ie.innerText = xNode.InnerText; } else { foreach (XmlNode nc in xNode.ChildNodes) { if (nc.NodeType == XmlNodeType.Text) { try { ie.innerText = nc.InnerText; } catch { } } else { CopyXmlNodeToHtml(hDoc, hn, nc); } } } if (xNode.Attributes != null) { foreach (XmlAttribute xa in xNode.Attributes) { ie.setAttribute(xa.Name, xa.Value, 0); } } if (xNode.ChildNodes.Count > 0) { } //ie.outerHTML = xNode.OuterXml; //IHTMLAttributeCollection attrs = hn.attributes as IHTMLAttributeCollection; // //hn.tx xHead.ChildNodes[i].OuterXml }
public DocumentModel Transform(Action <HTMLDocumentClass> interop) { HTMLDocumentClass myDocument = new HTMLDocumentClass(); IHTMLDocument2 doc2 = myDocument; doc2.write(new object[] { this.content }); interop(myDocument); this.content = myDocument.documentElement.outerHTML; string fileName = string.Format("{0}-transformed.html", DateTime.Now.ToString("hh:mm:ss-fffff")); this.absolutePath = Path.Combine(this.workingDirectory, fileName); File.WriteAllText(this.absolutePath, this.content, Encoding.UTF8); return(this); }
public void NavigateTo(Uri url) { var htmlDoc = new HTMLDocumentClass(); var ips = (IPersistStreamInit)htmlDoc; ips.InitNew(); var htmlDoc2 = htmlDoc.createDocumentFromUrl(url.AbsoluteUri, "null"); while (htmlDoc2.readyState != "complete") { //This is also a important part, without this DoEvents() appz hangs on to the “loading” Application.DoEvents(); } _ieDocument = new IEDocument(htmlDoc2); }
public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass) { foreach (IHTMLElement htmlElement in htmlDocumentClass.all) { if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null) { object href = htmlElement.getAttribute("href"); if (href != null && href is string && !string.IsNullOrEmpty((string)href)) { if (!rendererMessage.PropertiesValues.Contains(href)) { rendererMessage.PropertiesKeys.Add("element_href"); rendererMessage.PropertiesValues.Add(href); } } if (htmlElement.innerHTML != null && htmlElement.innerHTML.ToLowerInvariant().Contains("href")) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(htmlElement.innerHTML); if (htmlDocument.DocumentNode.Attributes != null) { foreach (HtmlAgilityPack.HtmlNode htmlNode in htmlDocument.DocumentNode.Descendants()) { if (htmlNode.Attributes != null) { foreach (HtmlAttribute htmlAttribute in htmlNode.Attributes) { if (htmlAttribute.Name.ToLowerInvariant().Contains("href")) { if (!rendererMessage.PropertiesValues.Contains(htmlAttribute.Value)) { rendererMessage.PropertiesKeys.Add("element_href"); rendererMessage.PropertiesValues.Add(htmlAttribute.Value); } } } } } } } } } }
protected void searchButton_Click(object sender, EventArgs e) { //索引库的位置 string indexPath = "C:/index"; FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); bool isUpdate = IndexReader.IndexExists(directory); if (isUpdate) { if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter write = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED); WebClient wc = new WebClient(); wc.Encoding = Encoding.UTF8; int maxID = GetMaxID(); for (int i = 1; i <= maxID; i++) { string url = "http://localhost:8080/showtopic-" + i + ".aspx"; string html = wc.DownloadString(url); HTMLDocumentClass doc = new HTMLDocumentClass(); doc.designMode = "on"; doc.IHTMLDocument2_write(html); doc.close(); string title = doc.title; string body = doc.body.innerText; write.DeleteDocuments(new Term("number", i.ToString())); Document document = new Document(); document.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("body", body, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); write.AddDocument(document); logger.Debug("索引" + i.ToString() + "完毕"); } write.Close(); directory.Close(); logger.Debug("全部索引完毕"); }
/// <summary> /// Creates a new instance of the HtmlHelpFile class /// </summary> /// <param name="f">The path to the file</param> public HtmlHelpFile( FileInfo f ) { try { Debug.Assert( f != null ); Debug.Assert( f.Exists ); m_file = f; m_doc = GetHtmlDocument( f ); m_dataIsland = GetXmlDataIsland(); m_TopicType = GetTopicType(); } catch ( Exception e ) { throw new ArgumentException( string.Format( "Could not open {0}", f.Name ), "f", e ); } }
/// <summary> /// FixHTML /// </summary> /// <param name="htmlToParse"></param> /// <returns></returns> string FixHTML(string htmlToParse) { try { //::......... Declare a new HTML document to use, and write our normal HTML IHTMLDocument2 htmlDocument = new HTMLDocumentClass(); htmlDocument.write(htmlToParse); htmlDocument.close(); //::......... With this we retrieve all of the HTML elements collection existing on out HTML block IHTMLElementCollection allElements = (IHTMLElementCollection)htmlDocument.body.all; IHTMLElementCollection allInputs = (IHTMLElementCollection)allElements.tags("img"); IHTMLElementCollection allStyles = (IHTMLElementCollection)htmlDocument.all.tags("style"); IHTMLElementCollection allhead = (IHTMLElementCollection)htmlDocument.all.tags("head"); string MyDoc = "<html>"; foreach (IHTMLElement HeadElement in allhead) { MyDoc += HeadElement.outerHTML; break; } //::......... Change some properties foreach (IHTMLElement element in allInputs) { string src = (string)element.getAttribute("src", 0); int Pos = -1; if ((Pos = src.IndexOf("_files/")) != -1) { string URL = GetSecurePDF(); string u = Request.Url.Authority; if (u.ToLower() == "mytravelplans.com.sg") { URL += u; } else if (u.ToLower() == "mytravelplans.com.au") { URL += u; } else if (u.ToLower() == "mytravelplans.co.in") { URL += u; } else { URL += u; } URL += GetWebDirectory() + src.Substring(Pos + 1); element.setAttribute("src", URL, 0); } } //::......... Return the parent element content ( BODY > HTML ) MyDoc += htmlDocument.body.outerHTML; MyDoc += "</html>"; return MyDoc; } catch (Exception e) { logger.Error("Exception in HTML fix: " + e.Message); } return htmlToParse; }
/// <summary> /// Opens the HTML document at the specified location /// </summary> /// <param name="f">The Html file</param> /// <returns>Pointer to the parsed Html document</returns> private HTMLDocumentClass GetHtmlDocument( FileInfo f ) { HTMLDocumentClass doc = null; try { doc = new HTMLDocumentClass(); UCOMIPersistFile persistFile = (UCOMIPersistFile)doc; persistFile.Load( f.FullName, 0 ); int start = Environment.TickCount; while( doc.body == null ) { // as precaution to ensure that the html is fully parsed // we spin here (for a maximum of 10 seconds) until the // body property is non-null if ( Environment.TickCount - start > 10000 ) { Trace.WriteLine( string.Format( "The document {0} timed out while loading", f.Name ) ); throw new Exception( string.Format( "The document {0} timed out while loading", f.Name ) ); } } } catch( Exception e ) { Trace.WriteLine( string.Format( "An error occured opening file {0}, {1}", f.Name, e.Message ) ); throw e; } return doc; }
public IHTMLDocument2 GetDocument(string url) { // new class HTMLDocumentClass oDoc = new HTMLDocumentClass(); // create class interface instances IHTMLDocument2 iDoc2a = (IHTMLDocument2)oDoc; IHTMLDocument4 iDoc4 = (IHTMLDocument4)oDoc; // This is the key ingredient - have to put some HTML // in the DOM before using it, even though we're not // accessing the DOM. iDoc2a.write("<html></html>"); iDoc2a.close(); IHTMLDocument2 internalDocument = iDoc4.createDocumentFromUrl(url, "null"); internalDocument.parentWindow.onerror = this; // wait for loading, timeout added if something happens DateTime timeout = DateTime.Now.AddSeconds(_timeout); while (internalDocument.readyState != "complete") { System.Windows.Forms.Application.DoEvents(); if ( DateTime.Now.CompareTo(timeout) > 0 ) { break; } } return internalDocument; }
private static IHTMLDocument2 OpenHtml( string uri ) { // this is a dummy document used to open the real document we're after HTMLDocumentClass doc = new HTMLDocumentClass(); // we need to do these QI's because the disp interface methods // seem to fail IHTMLDocument2 iDoc2a = doc; IHTMLDocument4 iDoc4 = doc; // need to put some html into the dummy document iDoc2a.writeln("<html></html>"); iDoc2a.close(); IHTMLDocument2 htmlDoc = doc.createDocumentFromUrl( uri, "null" ); for(uint i = 0; i < 300 && htmlDoc.readyState != "complete"; i++) Thread.Sleep(100); return htmlDoc; }
/// <summary> /// /// </summary> /// <param name="htmlToParse"></param> /// <returns></returns> string FixHTML(string htmlToParse) { try { //::......... Declare a new HTML document to use, and write our normal HTML IHTMLDocument2 htmlDocument = new HTMLDocumentClass(); htmlDocument.write(htmlToParse); htmlDocument.close(); //::......... With this we retrieve all of the HTML elements collection existing on out HTML block IHTMLElementCollection allElements = (IHTMLElementCollection)htmlDocument.body.all; //IHTMLImgElement Logo = (IHTMLImgElement)allElements.item((object)"myTextBox", (object)0); //::......... Find by name out INPUT element on the group, and set a new value //object Number = 0; //IHTMLInputElement myTextBox = (IHTMLInputElement)allElements.item((object)"myTextBox", (object)Number); //myTextBox.value = "This is my text box!"; //::......... Our button, but now its a "IHTMLElement", the generic object, that gives us more properties //::......... And set a new attribute to our element //IHTMLElement myButton = (IHTMLElement)allElements.item("myButton", 0); //myButton.setAttribute("onClick", "javascript:alert(//This is the button!//)", 0); //::......... As a input, we set its value //IHTMLInputElement myButton2 = (IHTMLInputElement)allElements.item("myButton", Number); //myButton2.value = "Click me!"; //::......... Get the INPUT group of elements IHTMLElementCollection allInputs = (IHTMLElementCollection)allElements.tags("img"); IHTMLElementCollection allStyles = (IHTMLElementCollection)htmlDocument.all.tags("style"); foreach (IHTMLElement element in allStyles) { //MasterPageStoredItin Mastr = (MasterPageStoredItin)Page.Master; string killLink = element.innerHTML.Replace("a:link,", ""); killLink = killLink.Replace("span.MsoHyperlink", "Killed"); //Mastr.SetStyle = killLink.Replace("a:visited,", ""); //Need To Work break; } //::......... Change some properties foreach (IHTMLElement element in allInputs) { string src = (string)element.getAttribute("src", 0); int Pos = -1; if ((Pos = src.IndexOf("_files/")) != -1) { string URL = GetSecurePDF(); string u = Request.Url.Authority; if (u.ToLower() == "mytravelplans.com.sg") { URL += u; } else if (u.ToLower() == "mytravelplans.com.au") { URL += u; } else if (u.ToLower() == "mytravelplans.co.in") { URL += u; } else if (u.ToLower() == "mytravelplans.eu") { URL += u; } else { URL += u; } string ImgName = GetImageName(m_FormName); if (ImgName.Length > 0) { src = src.ToUpper().Replace("IMAGE001.JPG", ImgName); } URL += GetWebDirectory() + src.Substring(Pos + 1); element.setAttribute("src", URL, 0); } } //::......... Return the parent element content ( BODY > HTML ) string MyoutString = htmlDocument.body.innerHTML; MyoutString = MyoutString.Replace("Download to calendar", ""); MyoutString = MyoutString.Replace("https", "http"); if (MyoutString.IndexOf("image002.jpg") != -1) { MyoutString = MyoutString.Replace("image001.jpg", "ErnstYoung.JPG"); MyoutString = MyoutString.Replace("image002.jpg", "image001.jpg"); //MyoutString = "<img id='Image-Maps_1201201201026058' src='http://www.mytptest.com/files/ernstyoung.jpg' usemap='#Image-Maps_1201201201026058' border='0' /><map id='_Image-Maps_1201201201026058' name='Image-Maps_1201201201026058'><area shape='rect' coords='386,292,767,342' href='https://help.telstra.com/app/answers/detail/a_id/17272/c/1986%2c1511%2c1640%2c2423/r_id/130958/sno/0' alt='' title='' /><area shape='rect' coords='347,734,728,784' href='mailto:[email protected]' alt='' title='' /></map>" + MyoutString; // htmlToParse = "test"; } return MyoutString; } catch (Exception e) { logger.Error("Exception in HTML fix: " + e.Message); } return ""; }
public void Index() { //SEARCHING FOR DIRECTORIES IN ASSETS DIRECTORY, WHICH ARE THEMES //string searchPath = Path.Combine(HttpRuntime.AppDomainAppVirtualPath, "Assets"); string searchPath = Path.Combine(HttpRuntime.AppDomainAppPath, "Assets"); string[] dirs = Directory.GetDirectories(searchPath, "*"); List<int> ids = new List<int>(); foreach (string dir in dirs) { int directoryId; if (int.TryParse(new DirectoryInfo(dir).Name, out directoryId)) { ids.Add(Convert.ToInt32(new DirectoryInfo(dir).Name)); } } var stages = ServerModel.DB.Load<TblResources>("CourseRef", ids); string xmlindex = Path.Combine(HttpRuntime.AppDomainAppPath, "tomcat-solr\\apache-solr-1.4.0\\Iudico\\"); try { //DELETING PREVIOUS CREATING XMLs string[] filePaths = Directory.GetFiles(xmlindex, "*.xml"); foreach (string filePath in filePaths) { File.Delete(filePath); } //DELETING SOLR INDEX HttpWebRequest request = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update?stream.body=%3Cdelete%3E%3Cquery%3Ename:*%3C/query%3E%3C/delete%3E") as HttpWebRequest; using (HttpWebResponse response = request.GetResponse() as HttpWebResponse) { } HttpWebRequest requestCommit = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update?stream.body=%3Ccommit/%3E") as HttpWebRequest; using (HttpWebResponse response = requestCommit.GetResponse() as HttpWebResponse) { } } catch (Exception ex) { if (ex.Message.ToString() == "Unable to connect to the remote server") { System.Diagnostics.Process procTomcat = new System.Diagnostics.Process(); procTomcat.EnableRaisingEvents = false; procTomcat.StartInfo.FileName = Path.Combine(HttpRuntime.AppDomainAppPath, "tomcat-solr\\tomcatStart.bat"); procTomcat.Start(); } } //CREATING NEW INDEX string filename = ""; int i = 0; XmlTextWriter writer; try { foreach (TblResources res in stages) { i++; filename = "XML" + i.ToString() + DateTime.Now.TimeOfDay.Hours.ToString() + DateTime.Now.TimeOfDay.Minutes.ToString() + DateTime.Now.TimeOfDay.Seconds.ToString() + ".xml"; //CREATING XML WITH ID, NAME AND CONTENT OF THEME writer = new XmlTextWriter(xmlindex + filename, null); writer.WriteStartElement("add"); writer.WriteStartElement("doc"); writer.WriteStartElement("field"); writer.WriteStartAttribute("name"); writer.WriteString("id"); writer.WriteEndAttribute(); writer.WriteString(res.CourseRef.ToString()); writer.WriteEndElement(); var stages2 = ServerModel.DB.Load<TblCourses>(res.CourseRef); string name = stages2.Name; writer.WriteStartElement("field"); writer.WriteStartAttribute("name"); writer.WriteString("name"); writer.WriteEndAttribute(); writer.WriteString(name); writer.WriteEndElement(); string filePath = Path.Combine(CourseManager.GetCoursePath(res.CourseRef), res.Href.ToString()); FileStream file = new FileStream(filePath, FileMode.OpenOrCreate, FileAccess.Read); StreamReader sr = new StreamReader(file); string s = sr.ReadToEnd(); sr.Close(); file.Close(); IHTMLDocument2 doc = new HTMLDocumentClass(); doc.write(new object[] { s }); doc.close(); writer.WriteStartElement("field"); writer.WriteStartAttribute("name"); writer.WriteString("content"); writer.WriteEndAttribute(); writer.WriteString(doc.body.innerText); writer.WriteEndElement(); writer.Flush(); writer.Close(); //Response.Write(res.Href + "<br>"); //INDEXING OF XML BY METHOD POST VIA HTTP Encoding xmlEncoding = Encoding.UTF8; string filePathXml = xmlindex + filename; FileStream fileXML = new FileStream(filePathXml, FileMode.OpenOrCreate, FileAccess.Read); StreamReader sr1 = new StreamReader(fileXML); string ss = sr1.ReadToEnd(); sr1.Close(); fileXML.Close(); HttpWebRequest requestIndex = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update") as HttpWebRequest; requestIndex.Method = "POST"; requestIndex.ContentType = "text/xml; charset=utf-8"; requestIndex.ProtocolVersion = HttpVersion.Version10; requestIndex.KeepAlive = false; byte[] data = xmlEncoding.GetBytes(ss); requestIndex.ContentLength = ss.Length; using (var postParams = requestIndex.GetRequestStream()) { postParams.Write(data, 0, data.Length); using (var response = requestIndex.GetResponse()) { using (var rStream = response.GetResponseStream()) { string r = xmlEncoding.GetString(ReadFully(rStream)); } } } } HttpWebRequest requestCommit1 = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update?stream.body=%3Ccommit/%3E") as HttpWebRequest; using (HttpWebResponse response = requestCommit1.GetResponse() as HttpWebResponse) { } } catch (Exception ex) { if (ex.Message.ToString() == "Unable to connect to the remote server") { System.Diagnostics.Process procTomcat = new System.Diagnostics.Process(); procTomcat.EnableRaisingEvents = false; procTomcat.StartInfo.FileName = Path.Combine(HttpRuntime.AppDomainAppPath, "tomcat-solr\\tomcatStart.bat"); procTomcat.Start(); } } }
private void DoSomethingWithParameter(object x) { var parameter = x as SpiderParameter; if (parameter!=null) { foreach (DataRow dr in parameter.DT.Rows) { HTMLDocumentClass hc = new HTMLDocumentClass(); //hc.designMode = "on";//这一句 IHTMLDocument2 doc2 = hc; doc2.write(""); doc2.close(); IHTMLDocument4 doc4 = hc; var rowId = Convert.ToInt32(dr["RowId"]); var url = Convert.ToString(dr["Url"]); var productId = Convert.ToString(dr["ProductId"]); IHTMLDocument2 doc = doc4.createDocumentFromUrl(url, "null"); int start = Environment.TickCount; while (doc.readyState != "complete") { if (Environment.TickCount - start > 1000000) { throw new Exception("The document timed out while loading"); } } //hc.documentElement.innerHTML; GatherPriceV2(productId, url, doc.body.outerHTML); doc.clear(); doc.close(); } } }
public void Test() { HTMLDocumentClass hc = new HTMLDocumentClass(); //hc.designMode = "on";//这一句 IHTMLDocument2 doc2 = hc; doc2.write(""); doc2.close(); IHTMLDocument4 doc4 = hc; //http://social.msdn.microsoft.com/Forums/zh-CN/ieextensiondevelopment/thread/d2ce2000-580d-452a-950f-e29fcd11a35f //http://social.msdn.microsoft.com/Forums/zh-CN/ieextensiondevelopment/thread/d2ce2000-580d-452a-950f-e29fcd11a35f //https://code.google.com/p/csexwb2/ //TryAllowCookies(new Uri("http://www.baidu.com")); //InternetSecurityManager.TryAllowCookies(new Uri("http://www.baidu.com")); IHTMLDocument2 doc = doc4.createDocumentFromUrl("http://www.baidu.com", "null"); //IInternetSecurityManager //var str= InternetSecurityManager.GetUrlZone("http://www.baidu.com"); //this.ProcessUrlAction( // "http://www.baidu.com", InteropConsts.URLACTION.COOKIES_ENABLED, URLPOLICY.ALLOW, , ,0x00000001 , 0); MapUrlToZone(new Uri("http://www.baidu.com")); //InternetSecurityManager.GetUrlZone("http://www.baidu.com"); int start = Environment.TickCount; while (doc.readyState != "complete") { Application.DoEvents(); if (Environment.TickCount - start > 1000000) { throw new Exception("The document timed out while loading"); } } //hc.documentElement.innerHTML; doc.clear(); doc.close(); }
private string GetBodyContent(string htmlDocument, bool outputXhtml) { // Use tidy over the chapter, if it's needed: string goodText = ""; if (AppSettings.UseTidyOverOutput) goodText = new TidyParser(UI, outputXhtml).ParseString(htmlDocument); else goodText = htmlDocument; // Extract the body content: HTMLDocumentClass docClass = new HTMLDocumentClass(); IHTMLDocument2 iDocFirstChapter = (IHTMLDocument2)docClass; object[] txtHtml = { goodText }; iDocFirstChapter.write(txtHtml); // return the content of the body: return iDocFirstChapter.body.innerHTML.Replace("about:blank", "").Replace("about:", ""); }
public DocumentModel Transform(Action<HTMLDocumentClass> interop) { HTMLDocumentClass doc = new HTMLDocumentClass(); IHTMLDocument2 doc2 = doc; doc2.write(new object[] { this.content }); interop(doc); this.content = doc.documentElement.outerHTML; File.WriteAllText(Path.Combine(this.workingDirectory, this.tempFileName), this.content); return this; }
private HTMLDocumentClass getDocumentFromHTML(string html) { object[] oPageText = { html }; HTMLDocumentClass myDoc = new HTMLDocumentClass(); IHTMLDocument2 oMyDoc = (IHTMLDocument2)myDoc; oMyDoc.write(oPageText); oMyDoc.close(); return oMyDoc as HTMLDocumentClass; }
private HTMLDocumentClass _getDocumentFromHTML(string html) { //html = html.Replace("<SCRIPT", "<SC"); object[] oPageText = { html }; HTMLDocumentClass myDoc = new HTMLDocumentClass(); IHTMLDocument2 oMyDoc = (IHTMLDocument2)myDoc; oMyDoc.write(oPageText); oMyDoc.close(); return oMyDoc as HTMLDocumentClass; }
public static HTMLDocument LoadDocument(string url) { Stream data = null; StreamReader reader = null; try { string htmlContent = DownloadString(url); // Load HTML with injected scripts object[] oPageText = { htmlContent }; HTMLDocument doc = new HTMLDocumentClass(); IHTMLDocument2 doc2 = (IHTMLDocument2)doc; doc2.write(oPageText); while (doc2.body == null) { Thread.Sleep(5000); } return doc; } catch (Exception e) { //logger.Error(e); } finally { // Cleanup if (data != null) data.Close(); if (reader != null) reader.Close(); } return null; }
/// <summary> /// Open source files. /// If they are not word, they will be converted to HTML. /// </summary> private void OpenSourceFiles() { MSWord msWord = null; try { string archivoFinal = (string)Project.SourceFiles[0]; esWord = MSWord.ItIsWordDocument(archivoFinal); dirHtml = null; // Si es un documento word, convertirlo a HTML filtrado if (esWord) { msWord = new MSWord(); archivoFinal = ConvertWordSourceFiles(msWord); // Be sure we have closed word, to avoid overlapping between the html read // and the reading from chmprocessor: msWord.Dispose(); msWord = null; } else // There is a single source HTML file. MainSourceFile = (string)Project.SourceFiles[0]; if (CancellRequested()) return; if (AppSettings.UseTidyOverInput) new TidyParser(UI).Parse(archivoFinal); if (CancellRequested()) return; // Prepare loading: HTMLDocumentClass docClass = new HTMLDocumentClass(); IPersistStreamInit ips = (IPersistStreamInit)docClass; ips.InitNew(); // Create a timer, to be sure that HTML file load will not be hang up (Sometime happens) timerTimeout = new System.Windows.Forms.Timer(); timerTimeout.Tick += new System.EventHandler(this.timer_Tick); timerTimeout.Interval = 60 * 1000; // 1 minute timerTimeout.Enabled = true; // Load the file: IHTMLDocument2 docLoader = (mshtml.IHTMLDocument2)docClass.createDocumentFromUrl( archivoFinal , null); System.Windows.Forms.Application.DoEvents(); System.Threading.Thread.Sleep(1000); String currentStatus = docLoader.readyState; log("Reading file " + archivoFinal + ". Status: " + currentStatus , 2 ); while (currentStatus != "complete" && timerTimeout.Enabled) { System.Windows.Forms.Application.DoEvents(); System.Threading.Thread.Sleep(500); String newStatus = docLoader.readyState; if (newStatus != currentStatus) { log("Status: " + newStatus, 2); if (currentStatus == "interactive" && newStatus == "uninitialized") { // f*****g shit bug. Try to reload the file: log("Warning. Something wrong happens loading the file. Trying to reopen " + archivoFinal , 2); docClass = new HTMLDocumentClass(); ips = (IPersistStreamInit)docClass; ips.InitNew(); docLoader = (mshtml.IHTMLDocument2)docClass.createDocumentFromUrl(archivoFinal, null); newStatus = docLoader.readyState; log("Status: " + newStatus, 2); } currentStatus = newStatus; } } if (!timerTimeout.Enabled) log("Warning: time to load file expired.", 1); timerTimeout.Enabled = false; // Get a copy of the document: HTMLDocumentClass newDocClass = new HTMLDocumentClass(); iDoc = (IHTMLDocument2)newDocClass; object[] txtHtml = { ((IHTMLDocument3)docLoader).documentElement.outerHTML }; iDoc.writeln(txtHtml); try { // Needed, otherwise some characters will not be displayed well. iDoc.charset = docLoader.charset; } catch (Exception ex) { log("Warning: Cannot set the charset \"" + docLoader.charset + "\" to the html document. Reason:" + ex.Message, 1); log(ex); } } finally { if (msWord != null) { msWord.Dispose(); msWord = null; } } }
public IHTMLDocument2 GetDocumentWithData(HtmlScriptCollection scripts,string data) { // new class HTMLDocumentClass oDoc = new HTMLDocumentClass(); // create class interface instances IHTMLDocument2 iDoc2a = (IHTMLDocument2)oDoc; IHTMLDocument4 iDoc4 = (IHTMLDocument4)oDoc; // This is the key ingredient - have to put some HTML // in the DOM before using it, even though we're not // accessing the DOM. iDoc2a.write("<html></html>"); iDoc2a.close(); iDoc2a.parentWindow.onerror = this; Regex removeScripts = (Regex)_htmlParser.GetRegExpParserScripts["RemoveScripts"]; MatchCollection matches = removeScripts.Matches(data); StringBuilder dataBuffer = new StringBuilder(data); scripts = CommentPopups(scripts); // parse html for (int i=0;i<matches.Count;i++) { HtmlScript scriptTag = scripts[i]; Match m = matches[i]; //dataBuffer.Remove(m.Index,m.Length); StringBuilder newScript = new StringBuilder(); newScript.Append("<script"); if ( scriptTag.Language.Length != 0 ) { newScript.AppendFormat(" language=\"{0}\"",scriptTag.Language); } newScript.Append(">"); newScript.Append(scriptTag.Text); newScript.Append("</script>"); //dataBuffer.Insert(m.Index,newScript.ToString()); dataBuffer.Replace(m.Value,newScript.ToString()); } // write data iDoc2a.write(dataBuffer.ToString()); iDoc2a.close(); return iDoc2a; }