private static IHTMLDocument2 HTMLToDom(string html)
        {
            IHTMLDocument2 doc = new HTMLDocumentClass();
            doc.write(new object[] { html });
            doc.close();

            return doc;
        }
Beispiel #2
0
        public static string SetImgLoadingHTML(string html)
        {
            IHTMLDocument2 doc = new HTMLDocumentClass();

            doc.write(new object[] { html });
            doc.close();
            //    System.Net.WebClient wc = new System.Net.WebClient();
            List <string> imgs = new List <string>();

            foreach (IHTMLImgElement image in doc.images)
            {
                IHTMLElement element = (IHTMLElement)image;

                string src = (string)element.getAttribute("src", 2);
                element.setAttribute("data-original", src, 1);
                element.setAttribute("src", Common.HtmlHelper.LoadImg200px(), 1);
                //if (src != null)
                //{
                //    //  Uri addr = new Uri(src);
                //    image.src = Common.HtmlHelper.LoadImg200px();

                //}
            }
            return(doc.body.innerHTML);
        }
 public static void MergeXmlHeadToHtml(HTMLDocumentClass hDoc, IHTMLDOMNode hHead, XmlNode xHead)
 {
     for (int i = 0; i < xHead.ChildNodes.Count; i++)
     {
         CopyXmlNodeToHtml(hDoc, hHead, xHead.ChildNodes[i]);
     }
 }
Beispiel #4
0
        private bool GetContainerIdHierarchy(HTMLDocumentClass document, int containerId, ref List <int> parentIds, ref InternetExplorer webBrowser)
        {
            IHTMLElementCollection frames = document.getElementsByTagName("iframe");

            foreach (HTMLIFrameClass frame in frames)
            {
                List <int>       list = new List <int>(parentIds);
                InternetExplorer ie   = GetInternetExplorer(frame.contentWindow);
                if (GetUrlHash(internetExplorerToLocation[ie]) == containerId)
                {
                    webBrowser = ie;
                    parentIds  = list;
                    return(true);
                }
                else
                {
                    list.Add(GetUrlHash(internetExplorerToLocation[ie]));
                    if (GetContainerIdHierarchy((HTMLDocumentClass)ie.Document, containerId, ref list, ref webBrowser))
                    {
                        parentIds = list;
                        return(true);
                    }
                }
            }
            return(false);
        }
Beispiel #5
0
        public static mshtml.IHTMLElement2 GetWindowWidthAndHeight(IntPtr hwnd, mshtml.IHTMLDocument2 doc, ref int clientWidth, ref int clientHeight, ref int scrollWidth, ref int scrollHeight)
        {
            HTMLDocumentClass class2 = doc as HTMLDocumentClass;

            mshtml.IHTMLElement2 documentElement = null;
            if (class2 != null)
            {
                documentElement = class2.documentElement as mshtml.IHTMLElement2;
                if (documentElement != null)
                {
                    clientWidth  = documentElement.clientWidth;
                    clientHeight = documentElement.clientHeight;
                }
            }
            else
            {
                Rect lpRect = new Rect();
                documentElement = doc.body as mshtml.IHTMLElement2;
                WindowUtil.GetWindowRect(hwnd, out lpRect);
                if (documentElement != null)
                {
                    clientWidth  = lpRect.Right - lpRect.Left;
                    clientHeight = lpRect.Bottom - lpRect.Top;
                }
            }
            if (documentElement != null)
            {
                scrollWidth  = documentElement.scrollWidth;
                scrollHeight = documentElement.scrollHeight;
            }
            return(documentElement);
        }
Beispiel #6
0
        public void OpenInternetExplorer(string url)
        {
            Win32.SetWindowPos(new IntPtr(IE.HWND), (IntPtr)Win32.hWndInsertAfter.HWND_TOPMOST, 0, 0, 0, 0, Win32.TOPMOST_FLAGS);

            Win32.SetWindowPos(new IntPtr(IE.HWND), (IntPtr)Win32.hWndInsertAfter.HWND_NOTTOPMOST, 0, 0, 0, 0, Win32.TOPMOST_FLAGS);

            IE.Navigate(url);
            int loopCount = 0;

            while (IE.ReadyState != tagREADYSTATE.READYSTATE_COMPLETE)
            {
                try
                {
                    dom = (HTMLDocumentClass)IE.Document;
                }
                catch (Exception)
                {
                    Thread.Sleep(1000);
                    continue;
                }
                if (dom.readyState.Equals("complete"))
                {
                    break;
                }
                if (loopCount > 2000)
                {
                    throw new Exception("Get " + url + " timeout!");
                }
                Thread.Sleep(500);
                loopCount++;
            }
        }
Beispiel #7
0
        private void axWebBrowser1_DocumentComplete(object sender, AxSHDocVw.DWebBrowserEvents2_DocumentCompleteEvent e)
        {
            switch (Task)
            {
            case 1:

                HTMLDocument myDoc = new HTMLDocumentClass();
                myDoc = (HTMLDocument)axWebBrowser1.Document;

                // a quick look at the google html source reveals:
                // <INPUT maxLength="256" size="55" name="q">
                //
                HTMLInputElement otxtSearchBox = (HTMLInputElement)myDoc.all.item("q", 0);

                otxtSearchBox.value = "intel corp";

                // google html source for the I'm Feeling Lucky Button:
                // <INPUT type=submit value="I'm Feeling Lucky" name=btnI>
                //
                HTMLInputElement btnSearch = (HTMLInputElement)myDoc.all.item("btnI", 0);
                btnSearch.click();

                Task++;
                break;

            case 2:

                // continuation of automated tasks...
                break;
            }
        }
Beispiel #8
0
        /// <summary>
        /// Inject JS code that should disable popup errors
        /// But some JS errors still appears : Need to be fixed.
        /// </summary>
        private void InjectDisableScript()
        {
            string disableScriptError = @"function noError() {return true;} window.onerror = noError;";

            HTMLDocumentClass doc  = webBrowser.Document as HTMLDocumentClass;
            HTMLDocument      doc2 = webBrowser.Document as HTMLDocument;

            IHTMLScriptElement scriptErrorSuppressed = (IHTMLScriptElement)doc2.createElement("SCRIPT");

            scriptErrorSuppressed.type = "text/javascript";
            scriptErrorSuppressed.text = disableScriptError;

            IHTMLElementCollection nodes = doc.getElementsByTagName("head");

            foreach (IHTMLElement elem in nodes)
            {
                try
                {
                    HTMLHeadElementClass head = (HTMLHeadElementClass)elem;
                    head.appendChild((IHTMLDOMNode)scriptErrorSuppressed);
                }
                catch (Exception)
                {
                }
            }
        }
Beispiel #9
0
        private static HTMLDocumentClass GetHtmlDocumentClassFromInternetExplorerServerHandle(IntPtr internetExplorerServerHandle)
        {
            int lngMsg = WinApi.User32.RegisterWindowMessage("WM_HTML_GETOBJECT");

            if (lngMsg == 0)
            {
                return(null);
            }

            int lRes;

            WinApi.User32.SendMessageTimeout(internetExplorerServerHandle, lngMsg, 0, 0, NativeMethods.SMTO_ABORTIFHUNG, kHtmlGetObjectTimeout, out lRes);
            if (lRes == 0)
            {
                return(null);
            }

            HTMLDocumentClass document = null;
            Guid guid    = typeof(IHTMLDocument2).GUID;
            int  hResult = NativeMethods.ObjectFromLresult(lRes, ref guid, 0, ref document);

            if (hResult != 0)
            {
                return(null);
            }

            return(document);
        }
Beispiel #10
0
        private void axWebBrowser1_DocumentComplete(object sender, AxSHDocVw.DWebBrowserEvents2_DocumentCompleteEvent e)
        {
            switch(Task)
            {
                case 1:

                    HTMLDocument myDoc = new HTMLDocumentClass();
                    myDoc = (HTMLDocument) axWebBrowser1.Document;

                    // a quick look at the google html source reveals:
                    // <INPUT maxLength="256" size="55" name="q">
                    //
                    HTMLInputElement otxtSearchBox = (HTMLInputElement) myDoc.all.item("q", 0);

                    otxtSearchBox.value = "intel corp";

                    // google html source for the I'm Feeling Lucky Button:
                    // <INPUT type=submit value="I'm Feeling Lucky" name=btnI>
                    //
                    HTMLInputElement btnSearch = (HTMLInputElement) myDoc.all.item("btnI", 0);
                    btnSearch.click();

                    Task++;
                    break;

                case 2:

                    // continuation of automated tasks...
                    break;
            }
        }
Beispiel #11
0
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        //未用  将相对路径链接方式的htmlcode转换为绝对路径的htmlcode
        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        public string ConvertToAbsoluteUrls(string html, Uri relativeLocation)
        {
            IHTMLDocument2 doc = new HTMLDocumentClass();

            doc.write(new object[] { html });
            doc.close();

            foreach (IHTMLAnchorElement anchor in doc.links)
            {
                IHTMLElement element = (IHTMLElement)anchor;
                string       href    = (string)element.getAttribute("href", 2);
                if (href != null)
                {
                    Uri addr = new Uri(relativeLocation, href);
                    anchor.href = addr.AbsoluteUri;
                }
            }

            foreach (IHTMLImgElement image in doc.images)
            {
                IHTMLElement element = (IHTMLElement)image;
                string       src     = (string)element.getAttribute("src", 2);
                if (src != null)
                {
                    Uri addr = new Uri(relativeLocation, src);
                    image.src = addr.AbsoluteUri;
                }
            }

            string ret = doc.body.innerHTML;

            return(ret);
        }
Beispiel #12
0
        public IEOperateCore(string url)
        {
            IE = InternetExplorerFactory.GetInternetExplorer(url);

            HWND = new IntPtr(IE.HWND);
            int loopCount = 0;

            while (IE.ReadyState != tagREADYSTATE.READYSTATE_COMPLETE)
            {
                try
                {
                    dom = (HTMLDocumentClass)IE.Document;
                }
                catch (Exception)
                {
                    Thread.Sleep(1000);
                    continue;
                }
                if (dom.readyState.Equals("complete"))
                {
                    break;
                }

                if (loopCount > 2000)
                {
                    throw new Exception("open " + url + " timeout!");
                }
                Thread.Sleep(500);
                loopCount++;
            }
        }
Beispiel #13
0
        private HTMLDocument GetDoc(string html)
        {
            HTMLDocument   dx   = new HTMLDocumentClass();
            IHTMLDocument2 doc2 = (IHTMLDocument2)dx;

            doc2.write(new object[] { html });
            return(dx);
        }
Beispiel #14
0
 public void Close()
 {
     if (_doc != null)
     {
         _doc.IHTMLDocument2_close();
         _doc = null;
     }
 }
Beispiel #15
0
 private static void AddScriptToHead(HTMLDocumentClass htmlDocument, IHTMLScriptElement script)
 {
     var headElementCollection = htmlDocument.getElementsByTagName(Head);
     foreach (IHTMLElement elem in headElementCollection)
     {
         var head = (HTMLHeadElementClass)elem;
         head.appendChild((IHTMLDOMNode)script);
     }
 }
Beispiel #16
0
        private void RenderDecodedHtml(string originalHtmlDocumentBodyInnerHtml, HTMLDocumentClass htmlDocument)
        {
            DateTime startTime = DateTime.Now;

            while (originalHtmlDocumentBodyInnerHtml == htmlDocument.documentElement.outerHTML && DateTime.Now.Subtract(startTime).Duration().TotalMinutes < _applicationSettings.CrawlRequestTimeoutInMinutes)
            {
                Thread.Sleep(100);
            }
        }
        private static string InnerHtmlText(string html)
        {
            HTMLDocumentClass doc = new HTMLDocumentClass();

            doc.designMode = "on";
            doc.IHTMLDocument2_write(html);

            return(doc.body.innerText);
        }
Beispiel #18
0
        public void UpdatePhone(string xml)
        {
            try
            {
                Phones.Clear();
                XmlDocument doc = new XmlDocument();
                doc.LoadXml(xml);

                XmlNodeList xnl = doc.GetElementsByTagName("html");

                if (xnl.Count > 0)
                {
                    XmlNode me = xnl[0];

                    xml = me.InnerXml.Trim();

                    if (xml.StartsWith("<![CDATA["))
                    {
                        //Trace.WriteLine("Removing CDATA");
                        xml = xml.Substring("<![CDATA[".Length);
                        xml = xml.Substring(0, xml.Length - 3);
                        xml = xml.Trim();
                    }
                    else
                    {
                        Trace.WriteLine("No CDATA!");
                    }

                    object[]       oPageText = { xml };
                    HTMLDocument   dx        = new HTMLDocumentClass();
                    IHTMLDocument2 doc2      = (IHTMLDocument2)dx;
                    doc2.write(oPageText);

                    IHTMLElementCollection ihec = dx.getElementsByTagName("div");

                    foreach (IHTMLElement e in ihec)
                    {
                        //Trace.WriteLine("Phone: " + e.className + " " + e.id);
                        if (e.className == "gc-forwarding-number-ani goog-inline-block")
                        {
                            Phone p = new Phone();
                            p.Number = e.innerText;
                            Phones.Add(p);
                            Trace.WriteLine("Found Phone: " + p.Number);
                        }
                    }
                }
                else
                {
                    Trace.WriteLine("Couldn't find HTML node in Phone XML");
                }
            }
            catch (Exception ex)
            {
                Trace.Write("XError loading Phone: " + ex);
            }
        }
Beispiel #19
0
        private static void AddScriptToHead(HTMLDocumentClass htmlDocument, IHTMLScriptElement script)
        {
            var headElementCollection = htmlDocument.getElementsByTagName(Head);

            foreach (IHTMLElement elem in headElementCollection)
            {
                var head = (HTMLHeadElementClass)elem;
                head.appendChild((IHTMLDOMNode)script);
            }
        }
Beispiel #20
0
        public IHTMLDocument2 ConverToTable(string html)
        {
            IHTMLDocument2 doc = new HTMLDocumentClass();

            doc.write(new object[] { html });
            doc.close();
            var title = doc.title;
            var body  = doc.body.innerText;

            return(doc);
        }
Beispiel #21
0
 public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass)
 {
     foreach (IHTMLElement htmlElement in htmlDocumentClass.getElementsByTagName("iframe"))
     {
         if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null)
         {
             rendererMessage.PropertiesKeys.Add("iframe_src");
             rendererMessage.PropertiesValues.Add(htmlElement.getAttribute("src"));
         }
     }
 }
        private static string GetCompleteMailBodyHtml(EmailMessage email)
        {
            string body = email.Body;

            HTMLDocumentClass doc = new HTMLDocumentClass();

            doc.designMode = "on";
            doc.IHTMLDocument2_write(body);

            return(doc.body.outerHTML);
        }
        public static string RetrieveInnerTextContent(string htmlContent)
        {
            object[]       oPageText = { htmlContent };
            HTMLDocument   doc       = new HTMLDocumentClass();
            IHTMLDocument2 document  = (IHTMLDocument2)doc;

            document.write(oPageText);
            document.close();
            //Console.WriteLine(document.body.innerHTML);  // whole content of body
            Console.WriteLine(document.body.innerText);  // all plain text in body
            return(document.body.innerText);
        }
Beispiel #24
0
        /// <summary>
        /// For each page that the browser loads, we look for one that fulfills all the following criteria:
        ///  - Has a meta-element named "Application-State" (that should consist of a ";"-separated list of "key=val" pairs)
        ///  - The content of the application-state contains "service=IDP" to tell that the response is coming from the identity provider (IDP)
        ///  - The content of the application-state contains "authenticated=true" to tell that the authentication went well
        ///  - The content of the application-state contains "state=token" to signal that the reponse contains the SAML token response
        /// If the above is true, the response contains the SAML token response as an attribute on the BODY tag named SSO_SAML2_TOKEN.
        /// </summary>
        private void OnLoadCompleted(object sender, NavigationEventArgs args)
        {
            Console.WriteLine("OnLoadCompleted: {0}", args.Uri);
            try
            {
                HTMLDocumentClass dom = (HTMLDocumentClass)(_browser.Document);

                // First, look for the application-state meta element
                IHTMLElementCollection applicationStateElementCollection = dom.getElementsByName("Application-State");

                // If there is no application-state, it is not the right page
                if (applicationStateElementCollection.length < 1)
                {
                    return;
                }

                // Application state looks like this: <meta name="Application-State" content="service=IDP;federated=False;env=Test;state=Ok;authenticated=True;">
                string applicationState = ((HTMLMetaElement)applicationStateElementCollection.item(0)).content;

                // Split at ";" sepearator
                string[] applicationStateElements = applicationState.Split(';');

                // Look for the "state=idp" - this means that the process is finished
                // Without checking this, we don't know whether the authentication failed or just haven't gotten there yet
                if (!applicationStateElements.Any(s => (s.Equals("service=idp", StringComparison.OrdinalIgnoreCase))))
                {
                    // This is not the IDP responding, probably a page earlier in the login flow - just skip it
                    return;
                }

                // This is the IDP response page. Check if the list of values contains authenticated=true
                if (applicationStateElements.Any(s => (s.Equals("authenticated=true", StringComparison.OrdinalIgnoreCase))) &&
                    applicationStateElements.Any(s => (s.Equals("state=token", StringComparison.OrdinalIgnoreCase))))
                {
                    // On the final page, the SAML-Response is on an attribute on the body element, named SSO_SAML2_TOKEN
                    HTMLBody bodyElement = ((HTMLBody)dom.getElementsByTagName("body").item(0));
                    object   attribute   = bodyElement.getAttribute("SSO_SAML2_TOKEN");

                    SAMLResponse = attribute.ToString();
                    DialogResult = true;
                }
                else
                {
                    DialogResult = false;
                }
                Close();
            }
            catch (Exception ex)
            {
                Console.Error.WriteLine(ex);
                Close();
            }
        }
Beispiel #25
0
        public static IHTMLDocument2 GetWebpageDocument(string sUrl)
        {
            StringBuilder sb = GetWebpage(sUrl);

            //reads the html into an html document to enable parsing
            IHTMLDocument2 doc = new HTMLDocumentClass();

            doc.write(new object[] { sb.ToString() });
            doc.close();

            return(doc);
        }
Beispiel #26
0
 public void CloseInternetExplorer()
 {
     if (dom != null)
     {
         dom.close();
         dom = null;
     }
     if (IE != null)
     {
         InternetExplorerFactory.CloseInternetExplorer();
         HWND = new IntPtr(0);
     }
 }
Beispiel #27
0
        private void Login(GetSkypeLoginWindowDataResult skypeLoginWindowData)
        {
            // Set user name
            WinApi.User32.SendMessage(skypeLoginWindowData.LoginEditPtr, WinApi.MessageType.WM_SETTEXT, IntPtr.Zero, _username);

            // Simulate Enter press to initiate login
            WinApi.User32.PostMessage(skypeLoginWindowData.LoginEditPtr, WinApi.MessageType.WM_KEYDOWN, new IntPtr((int)WinApi.VirtualKeyCode.VK_RETURN), IntPtr.Zero);

            // Force minimize Skype if it should. For some reason Skype restores itself after login begins
            if (_minimized)
            {
                TimedOutOperation(1000, 1, () => {
                    if (!WinApi.User32.IsIconic(skypeLoginWindowData.LoginWindowPtr))
                    {
                        WinApi.User32.ShowWindow(skypeLoginWindowData.LoginWindowPtr, WinApi.User32.ShowWindowCommands.SW_FORCEMINIMIZE);
                        return(true);
                    }
                    return(false);
                });
            }

            _canThrowSkypeExitException = true;
            try {
                bool success = TimedOutOperation(kHtmlGetObjectTimeout, kOperationRetryDelay, () => {
                    IntPtr internetExplorerServerHandle = GetInternetExplorerServerHandle(skypeLoginWindowData.LoginWindowPtr);
                    if (internetExplorerServerHandle == IntPtr.Zero)
                    {
                        return(false);
                    }

                    _loginBrowserHtmlDocument = GetHtmlDocumentClassFromInternetExplorerServerHandle(internetExplorerServerHandle);
                    if (_loginBrowserHtmlDocument == null)
                    {
                        return(false);
                    }

                    return(true);
                });

                if (!success)
                {
                    throw new LoginException("Unable to get the Skype login web document");
                }

                WebBrowserLogin();
            } catch (ThreadAbortException e) {
                throw new LoginException("Skype process has died unexpectedly", e);
            } finally {
                _canThrowSkypeExitException = false;
            }
        }
Beispiel #28
0
        /// <summary>
        /// Converts and HTML string read from and html file or database into <see cref=" mshtml.IHTMLDocument2">HTML document</see>
        /// </summary>
        /// <param name="htmlString">The entry html string</param>
        /// <returns><see cref=" mshtml.IHTMLDocument2">HTML document</see></returns>
        public static mshtml.IHTMLDocument2 GetHtmlDocument(string htmlString)
        {
            if (htmlString.Length > 0)
            {
                //reads the html into an html document to enable parsing
                IHTMLDocument2 doc = new HTMLDocumentClass();
                doc.write(new object[] { htmlString });
                doc.close();

                return(doc);
            }

            return(null);
        }
        public static void CopyXmlNodeToHtml(HTMLDocumentClass hDoc, IHTMLDOMNode hNode, XmlNode xNode)
        {
            IHTMLElement ie = hDoc.createElement(xNode.Name);
            IHTMLDOMNode hn = ie as IHTMLDOMNode;

            hNode.appendChild(hn);
            if (xNode.NodeType == XmlNodeType.CDATA)
            {
                ie.innerText = xNode.Value;
            }
            else if (xNode.NodeType == XmlNodeType.Text)
            {
                ie.innerText = xNode.InnerText;
            }
            else
            {
                foreach (XmlNode nc in xNode.ChildNodes)
                {
                    if (nc.NodeType == XmlNodeType.Text)
                    {
                        try
                        {
                            ie.innerText = nc.InnerText;
                        }
                        catch
                        {
                        }
                    }
                    else
                    {
                        CopyXmlNodeToHtml(hDoc, hn, nc);
                    }
                }
            }
            if (xNode.Attributes != null)
            {
                foreach (XmlAttribute xa in xNode.Attributes)
                {
                    ie.setAttribute(xa.Name, xa.Value, 0);
                }
            }
            if (xNode.ChildNodes.Count > 0)
            {
            }
            //ie.outerHTML = xNode.OuterXml;
            //IHTMLAttributeCollection attrs = hn.attributes as IHTMLAttributeCollection;

            //
            //hn.tx xHead.ChildNodes[i].OuterXml
        }
Beispiel #30
0
        public DocumentModel Transform(Action <HTMLDocumentClass> interop)
        {
            HTMLDocumentClass myDocument = new HTMLDocumentClass();
            IHTMLDocument2    doc2       = myDocument;

            doc2.write(new object[] { this.content });
            interop(myDocument);
            this.content = myDocument.documentElement.outerHTML;
            string fileName = string.Format("{0}-transformed.html", DateTime.Now.ToString("hh:mm:ss-fffff"));

            this.absolutePath = Path.Combine(this.workingDirectory, fileName);
            File.WriteAllText(this.absolutePath, this.content, Encoding.UTF8);
            return(this);
        }
Beispiel #31
0
        public void NavigateTo(Uri url)
        {
            var htmlDoc = new HTMLDocumentClass();
            var ips = (IPersistStreamInit)htmlDoc;
            ips.InitNew();

            var htmlDoc2 = htmlDoc.createDocumentFromUrl(url.AbsoluteUri, "null");

            while (htmlDoc2.readyState != "complete")
            {
                //This is also a important part, without this DoEvents() appz hangs on to the “loading”
                Application.DoEvents();
            }
            _ieDocument = new IEDocument(htmlDoc2);
        }
Beispiel #32
0
        public void NavigateTo(Uri url)
        {
            var htmlDoc = new HTMLDocumentClass();
            var ips     = (IPersistStreamInit)htmlDoc;

            ips.InitNew();

            var htmlDoc2 = htmlDoc.createDocumentFromUrl(url.AbsoluteUri, "null");

            while (htmlDoc2.readyState != "complete")
            {
                //This is also a important part, without this DoEvents() appz hangs on to the “loading”
                Application.DoEvents();
            }
            _ieDocument = new IEDocument(htmlDoc2);
        }
Beispiel #33
0
        public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass)
        {
            foreach (IHTMLElement htmlElement in htmlDocumentClass.all)
            {
                if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null)
                {
                    object href = htmlElement.getAttribute("href");

                    if (href != null && href is string && !string.IsNullOrEmpty((string)href))
                    {
                        if (!rendererMessage.PropertiesValues.Contains(href))
                        {
                            rendererMessage.PropertiesKeys.Add("element_href");
                            rendererMessage.PropertiesValues.Add(href);
                        }
                    }

                    if (htmlElement.innerHTML != null && htmlElement.innerHTML.ToLowerInvariant().Contains("href"))
                    {
                        HtmlDocument htmlDocument = new HtmlDocument();

                        htmlDocument.LoadHtml(htmlElement.innerHTML);

                        if (htmlDocument.DocumentNode.Attributes != null)
                        {
                            foreach (HtmlAgilityPack.HtmlNode htmlNode in htmlDocument.DocumentNode.Descendants())
                            {
                                if (htmlNode.Attributes != null)
                                {
                                    foreach (HtmlAttribute htmlAttribute in htmlNode.Attributes)
                                    {
                                        if (htmlAttribute.Name.ToLowerInvariant().Contains("href"))
                                        {
                                            if (!rendererMessage.PropertiesValues.Contains(htmlAttribute.Value))
                                            {
                                                rendererMessage.PropertiesKeys.Add("element_href");
                                                rendererMessage.PropertiesValues.Add(htmlAttribute.Value);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
Beispiel #34
0
        protected void searchButton_Click(object sender, EventArgs e)
        {
            //索引库的位置
            string      indexPath = "C:/index";
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
            bool        isUpdate  = IndexReader.IndexExists(directory);

            if (isUpdate)
            {
                if (IndexWriter.IsLocked(directory))
                {
                    IndexWriter.Unlock(directory);
                }
            }
            IndexWriter write = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, IndexWriter.MaxFieldLength.UNLIMITED);

            WebClient wc = new WebClient();

            wc.Encoding = Encoding.UTF8;
            int maxID = GetMaxID();

            for (int i = 1; i <= maxID; i++)
            {
                string            url  = "http://localhost:8080/showtopic-" + i + ".aspx";
                string            html = wc.DownloadString(url);
                HTMLDocumentClass doc  = new HTMLDocumentClass();

                doc.designMode = "on";
                doc.IHTMLDocument2_write(html);
                doc.close();

                string title = doc.title;
                string body  = doc.body.innerText;

                write.DeleteDocuments(new Term("number", i.ToString()));

                Document document = new Document();
                document.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.Add(new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.Add(new Field("body", body, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                write.AddDocument(document);
                logger.Debug("索引" + i.ToString() + "完毕");
            }
            write.Close();
            directory.Close();
            logger.Debug("全部索引完毕");
        }
Beispiel #35
0
		/// <summary>
		/// Creates a new instance of the HtmlHelpFile class
		/// </summary>
		/// <param name="f">The path to the file</param>
		public HtmlHelpFile( FileInfo f )
		{
			try 
			{
				Debug.Assert( f != null );
				Debug.Assert( f.Exists );

				m_file = f;
				m_doc = GetHtmlDocument( f );
				m_dataIsland = GetXmlDataIsland();
				m_TopicType = GetTopicType();
			}
			catch ( Exception e )
			{
				throw new ArgumentException( string.Format( "Could not open {0}", f.Name ), "f", e );
			}
		}
Beispiel #36
0
        /// <summary>
        /// FixHTML
        /// </summary>
        /// <param name="htmlToParse"></param>
        /// <returns></returns>
        string FixHTML(string htmlToParse)
        {
            try
            {
                //::......... Declare a new HTML document to use, and write our normal HTML
                IHTMLDocument2 htmlDocument = new HTMLDocumentClass();

                htmlDocument.write(htmlToParse);
                htmlDocument.close();

                //::......... With this we retrieve all of the HTML elements collection existing on out HTML block
                IHTMLElementCollection allElements = (IHTMLElementCollection)htmlDocument.body.all;
                IHTMLElementCollection allInputs = (IHTMLElementCollection)allElements.tags("img");
                IHTMLElementCollection allStyles = (IHTMLElementCollection)htmlDocument.all.tags("style");
                IHTMLElementCollection allhead = (IHTMLElementCollection)htmlDocument.all.tags("head");
                string MyDoc = "<html>";

                foreach (IHTMLElement HeadElement in allhead)
                {
                    MyDoc += HeadElement.outerHTML;
                    break;
                }
                //::......... Change some properties
                foreach (IHTMLElement element in allInputs)
                {

                    string src = (string)element.getAttribute("src", 0);
                    int Pos = -1;
                    if ((Pos = src.IndexOf("_files/")) != -1)
                    {
                        string URL = GetSecurePDF();
                        string u = Request.Url.Authority;

                        if (u.ToLower() == "mytravelplans.com.sg")
                        {
                            URL += u;
                        }
                        else if (u.ToLower() == "mytravelplans.com.au")
                        {
                            URL += u;
                        }
                        else if (u.ToLower() == "mytravelplans.co.in")
                        {
                            URL += u;
                        }
                        else
                        {
                            URL += u;
                        }

                        URL += GetWebDirectory() + src.Substring(Pos + 1);
                        element.setAttribute("src", URL, 0);
                    }
                }

                //::......... Return the parent element content ( BODY > HTML )
                MyDoc += htmlDocument.body.outerHTML;

                MyDoc += "</html>";
                return MyDoc;
            }
            catch (Exception e)
            {
                logger.Error("Exception in HTML fix: " + e.Message);
            }
            return htmlToParse;
        }
Beispiel #37
0
		/// <summary>
		/// Opens the HTML document at the specified location
		/// </summary>
		/// <param name="f">The Html file</param>
		/// <returns>Pointer to the parsed Html document</returns>
		private HTMLDocumentClass GetHtmlDocument( FileInfo f )
		{
			HTMLDocumentClass doc = null;
			
			try
			{
				doc = new HTMLDocumentClass();

				UCOMIPersistFile persistFile = (UCOMIPersistFile)doc;
				persistFile.Load( f.FullName, 0 );

				int start = Environment.TickCount;

				while( doc.body == null ) 
				{ 
					// as precaution to ensure that the html is fully parsed
					// we spin here (for a maximum of 10 seconds) until the 
					// body property is non-null
					if ( Environment.TickCount - start > 10000 )
					{
						Trace.WriteLine( string.Format( "The document {0} timed out while loading", f.Name ) );
						throw new Exception( string.Format( "The document {0} timed out while loading", f.Name ) );
					}
				}
			}
			catch( Exception e )
			{
				Trace.WriteLine( string.Format( "An error occured opening file {0}, {1}", f.Name, e.Message ) );
				throw e;
			}

			return doc;
		}
        public IHTMLDocument2 GetDocument(string url)
        {
            // new class
            HTMLDocumentClass oDoc = new HTMLDocumentClass();

            // create class interface instances
            IHTMLDocument2 iDoc2a = (IHTMLDocument2)oDoc;
            IHTMLDocument4 iDoc4 = (IHTMLDocument4)oDoc;

            // This is the key ingredient - have to put some HTML
            // in the DOM before using it, even though we're not
            // accessing the DOM.
            iDoc2a.write("<html></html>");
            iDoc2a.close();

            IHTMLDocument2 internalDocument = iDoc4.createDocumentFromUrl(url, "null");
            internalDocument.parentWindow.onerror = this;

            // wait for loading, timeout added if something happens
            DateTime timeout = DateTime.Now.AddSeconds(_timeout);
            while (internalDocument.readyState != "complete")
            {
                System.Windows.Forms.Application.DoEvents();

                if ( DateTime.Now.CompareTo(timeout) > 0 )
                {
                    break;
                }
            }

            return internalDocument;
        }
Beispiel #39
0
		private static IHTMLDocument2 OpenHtml( string uri )
		{
			// this is a dummy document used to open the real document we're after
			HTMLDocumentClass doc = new HTMLDocumentClass();
			// we need to do these QI's because the disp interface methods
			// seem to fail
			IHTMLDocument2 iDoc2a = doc;
			IHTMLDocument4 iDoc4 = doc;

			// need to put some html into the dummy document
			iDoc2a.writeln("<html></html>");
			iDoc2a.close();

			IHTMLDocument2 htmlDoc = doc.createDocumentFromUrl( uri, "null" );

			for(uint i = 0; i < 300 && htmlDoc.readyState != "complete"; i++)
				Thread.Sleep(100);

			return htmlDoc;
		}
        /// <summary>
        /// 
        /// </summary>
        /// <param name="htmlToParse"></param>
        /// <returns></returns>
        string FixHTML(string htmlToParse)
        {
            try
            {
                //::......... Declare a new HTML document to use, and write our normal HTML
                IHTMLDocument2 htmlDocument = new HTMLDocumentClass();

                htmlDocument.write(htmlToParse);
                htmlDocument.close();

                //::......... With this we retrieve all of the HTML elements collection existing on out HTML block
                IHTMLElementCollection allElements = (IHTMLElementCollection)htmlDocument.body.all;

                //IHTMLImgElement Logo = (IHTMLImgElement)allElements.item((object)"myTextBox", (object)0);

                //::......... Find by name out INPUT element on the group, and set a new value
                //object Number = 0;
                //IHTMLInputElement myTextBox = (IHTMLInputElement)allElements.item((object)"myTextBox", (object)Number);
                //myTextBox.value = "This is my text box!";

                //::......... Our button, but now its a "IHTMLElement", the generic object, that gives us more properties
                //::......... And set a new attribute to our element

                //IHTMLElement myButton = (IHTMLElement)allElements.item("myButton", 0);
                //myButton.setAttribute("onClick", "javascript:alert(//This is the button!//)", 0);

                //::......... As a input, we set its value
                //IHTMLInputElement myButton2 = (IHTMLInputElement)allElements.item("myButton", Number);
                //myButton2.value = "Click me!";

                //::......... Get the INPUT group of elements
                IHTMLElementCollection allInputs = (IHTMLElementCollection)allElements.tags("img");
                IHTMLElementCollection allStyles = (IHTMLElementCollection)htmlDocument.all.tags("style");

                foreach (IHTMLElement element in allStyles)
                {
                    //MasterPageStoredItin Mastr = (MasterPageStoredItin)Page.Master;
                    string killLink = element.innerHTML.Replace("a:link,", "");
                    killLink = killLink.Replace("span.MsoHyperlink", "Killed");
                    //Mastr.SetStyle = killLink.Replace("a:visited,", "");  //Need To Work
                    break;
                }
                //::......... Change some properties
                foreach (IHTMLElement element in allInputs)
                {

                    string src = (string)element.getAttribute("src", 0);
                    int Pos = -1;
                    if ((Pos = src.IndexOf("_files/")) != -1)
                    {
                        string URL = GetSecurePDF();
                        string u = Request.Url.Authority;

                        if (u.ToLower() == "mytravelplans.com.sg")
                        {
                            URL += u;
                        }
                        else if (u.ToLower() == "mytravelplans.com.au")
                        {
                            URL += u;
                        }
                        else if (u.ToLower() == "mytravelplans.co.in")
                        {
                            URL += u;
                        }
                        else if (u.ToLower() == "mytravelplans.eu")
                        {
                            URL += u;
                        }
                        else
                        {
                            URL += u;
                        }

                        string ImgName = GetImageName(m_FormName);

                        if (ImgName.Length > 0)
                        {
                            src = src.ToUpper().Replace("IMAGE001.JPG", ImgName);
                        }

                        URL += GetWebDirectory() + src.Substring(Pos + 1);
                        element.setAttribute("src", URL, 0);
                    }
                }

                //::......... Return the parent element content ( BODY > HTML )
                string MyoutString = htmlDocument.body.innerHTML;
                MyoutString = MyoutString.Replace("Download to calendar", "");
                MyoutString = MyoutString.Replace("https", "http");

                if (MyoutString.IndexOf("image002.jpg") != -1)
                {

                    MyoutString = MyoutString.Replace("image001.jpg", "ErnstYoung.JPG");

                    MyoutString = MyoutString.Replace("image002.jpg", "image001.jpg");

                    //MyoutString = "<img id='Image-Maps_1201201201026058' src='http://www.mytptest.com/files/ernstyoung.jpg' usemap='#Image-Maps_1201201201026058' border='0' /><map id='_Image-Maps_1201201201026058' name='Image-Maps_1201201201026058'><area shape='rect' coords='386,292,767,342' href='https://help.telstra.com/app/answers/detail/a_id/17272/c/1986%2c1511%2c1640%2c2423/r_id/130958/sno/0' alt='' title=''    /><area shape='rect' coords='347,734,728,784' href='mailto:[email protected]' alt='' title=''    /></map>" + MyoutString;

                    //  htmlToParse = "test";
                }

                return MyoutString;
            }
            catch (Exception e)
            {
                logger.Error("Exception in HTML fix: " + e.Message);
            }
            return "";
        }
Beispiel #41
0
    public void Index()
    {
       
            //SEARCHING FOR DIRECTORIES IN ASSETS DIRECTORY, WHICH ARE THEMES
            //string searchPath = Path.Combine(HttpRuntime.AppDomainAppVirtualPath, "Assets");
            string searchPath = Path.Combine(HttpRuntime.AppDomainAppPath, "Assets");
            string[] dirs = Directory.GetDirectories(searchPath, "*");
            List<int> ids = new List<int>();

            foreach (string dir in dirs)
            {
                int directoryId;
                if (int.TryParse(new DirectoryInfo(dir).Name, out directoryId))
                {
                    ids.Add(Convert.ToInt32(new DirectoryInfo(dir).Name));
                }
            }

            var stages = ServerModel.DB.Load<TblResources>("CourseRef", ids);
            string xmlindex = Path.Combine(HttpRuntime.AppDomainAppPath, "tomcat-solr\\apache-solr-1.4.0\\Iudico\\");

        try
        {            
            //DELETING PREVIOUS CREATING XMLs
            string[] filePaths = Directory.GetFiles(xmlindex, "*.xml");
            foreach (string filePath in filePaths)
            {
                File.Delete(filePath);
            }


            //DELETING SOLR INDEX
            HttpWebRequest request = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update?stream.body=%3Cdelete%3E%3Cquery%3Ename:*%3C/query%3E%3C/delete%3E") as HttpWebRequest;
            using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)
            { }

            HttpWebRequest requestCommit = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update?stream.body=%3Ccommit/%3E") as HttpWebRequest;
            using (HttpWebResponse response = requestCommit.GetResponse() as HttpWebResponse)
            { }

        }
        catch (Exception ex)
        {
            if (ex.Message.ToString() == "Unable to connect to the remote server")
            {
                System.Diagnostics.Process procTomcat = new System.Diagnostics.Process();
                procTomcat.EnableRaisingEvents = false;
                procTomcat.StartInfo.FileName = Path.Combine(HttpRuntime.AppDomainAppPath, "tomcat-solr\\tomcatStart.bat");
                procTomcat.Start();
            }
        }

        //CREATING NEW INDEX
        string filename = "";
        int i = 0;
        XmlTextWriter writer;

        try
        {
            foreach (TblResources res in stages)
            {
                i++;

                filename = "XML" + i.ToString() + DateTime.Now.TimeOfDay.Hours.ToString() + DateTime.Now.TimeOfDay.Minutes.ToString() + DateTime.Now.TimeOfDay.Seconds.ToString() + ".xml";

                //CREATING XML WITH ID, NAME AND CONTENT OF THEME
                writer = new XmlTextWriter(xmlindex + filename, null);
                writer.WriteStartElement("add");
                writer.WriteStartElement("doc");
                writer.WriteStartElement("field");
                writer.WriteStartAttribute("name");
                writer.WriteString("id");
                writer.WriteEndAttribute();
                writer.WriteString(res.CourseRef.ToString());
                writer.WriteEndElement();

                var stages2 = ServerModel.DB.Load<TblCourses>(res.CourseRef);

                string name = stages2.Name;

                writer.WriteStartElement("field");
                writer.WriteStartAttribute("name");
                writer.WriteString("name");
                writer.WriteEndAttribute();
                writer.WriteString(name);
                writer.WriteEndElement();

                string filePath = Path.Combine(CourseManager.GetCoursePath(res.CourseRef), res.Href.ToString());
                FileStream file = new FileStream(filePath, FileMode.OpenOrCreate, FileAccess.Read);
                StreamReader sr = new StreamReader(file);
                string s = sr.ReadToEnd();
                sr.Close();
                file.Close();

                IHTMLDocument2 doc = new HTMLDocumentClass();
                doc.write(new object[] { s });
                doc.close();

                writer.WriteStartElement("field");
                writer.WriteStartAttribute("name");
                writer.WriteString("content");
                writer.WriteEndAttribute();
                writer.WriteString(doc.body.innerText);
                writer.WriteEndElement();

                writer.Flush();
                writer.Close();
                //Response.Write(res.Href + "<br>");


                //INDEXING OF XML BY METHOD POST VIA HTTP
                Encoding xmlEncoding = Encoding.UTF8;

                string filePathXml = xmlindex + filename;
                FileStream fileXML = new FileStream(filePathXml, FileMode.OpenOrCreate, FileAccess.Read);
                StreamReader sr1 = new StreamReader(fileXML);
                string ss = sr1.ReadToEnd();
                sr1.Close();
                fileXML.Close();

                HttpWebRequest requestIndex = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update") as HttpWebRequest;
                requestIndex.Method = "POST";
                requestIndex.ContentType = "text/xml; charset=utf-8";
                requestIndex.ProtocolVersion = HttpVersion.Version10;
                requestIndex.KeepAlive = false;

                byte[] data = xmlEncoding.GetBytes(ss);
                requestIndex.ContentLength = ss.Length;

                using (var postParams = requestIndex.GetRequestStream())
                {
                    postParams.Write(data, 0, data.Length);
                    using (var response = requestIndex.GetResponse())
                    {
                        using (var rStream = response.GetResponseStream())
                        {
                            string r = xmlEncoding.GetString(ReadFully(rStream));
                        }
                    }
                }

            }


            HttpWebRequest requestCommit1 = WebRequest.Create("http://localhost:8080/apache-solr-1.4.0/update?stream.body=%3Ccommit/%3E") as HttpWebRequest;
            using (HttpWebResponse response = requestCommit1.GetResponse() as HttpWebResponse)
            {


            }


        }
        catch (Exception ex)
        {
            if (ex.Message.ToString() == "Unable to connect to the remote server")
            {
                System.Diagnostics.Process procTomcat = new System.Diagnostics.Process();
                procTomcat.EnableRaisingEvents = false;
                procTomcat.StartInfo.FileName = Path.Combine(HttpRuntime.AppDomainAppPath, "tomcat-solr\\tomcatStart.bat");
                procTomcat.Start();
            }
        }

    }
        private void DoSomethingWithParameter(object x)
        {
            var parameter = x as SpiderParameter;
            if (parameter!=null)
            {
                foreach (DataRow dr in parameter.DT.Rows)
                {

                    HTMLDocumentClass hc = new HTMLDocumentClass();
                    //hc.designMode = "on";//这一句
                    IHTMLDocument2 doc2 = hc;
                    doc2.write("");
                    doc2.close();
                    IHTMLDocument4 doc4 = hc;

                    var rowId = Convert.ToInt32(dr["RowId"]);
                    var url = Convert.ToString(dr["Url"]);
                    var productId = Convert.ToString(dr["ProductId"]);

                    IHTMLDocument2 doc = doc4.createDocumentFromUrl(url, "null");
                    int start = Environment.TickCount;
                    while (doc.readyState != "complete")
                    {
                        if (Environment.TickCount - start > 1000000)
                        {
                            throw new Exception("The document timed out while loading");
                        }
                    }
                    //hc.documentElement.innerHTML;
                    GatherPriceV2(productId, url, doc.body.outerHTML);
                    doc.clear();
                    doc.close();
                }
            }
        }
        public void Test()
        {
            HTMLDocumentClass hc = new HTMLDocumentClass();
            //hc.designMode = "on";//这一句
            IHTMLDocument2 doc2 = hc;
            doc2.write("");
            doc2.close();
            IHTMLDocument4 doc4 = hc;
            //http://social.msdn.microsoft.com/Forums/zh-CN/ieextensiondevelopment/thread/d2ce2000-580d-452a-950f-e29fcd11a35f
            //http://social.msdn.microsoft.com/Forums/zh-CN/ieextensiondevelopment/thread/d2ce2000-580d-452a-950f-e29fcd11a35f

            //https://code.google.com/p/csexwb2/
            //TryAllowCookies(new Uri("http://www.baidu.com"));
            //InternetSecurityManager.TryAllowCookies(new Uri("http://www.baidu.com"));
            IHTMLDocument2 doc = doc4.createDocumentFromUrl("http://www.baidu.com", "null");
            //IInternetSecurityManager
            //var str= InternetSecurityManager.GetUrlZone("http://www.baidu.com");

            //this.ProcessUrlAction(
            //    "http://www.baidu.com", InteropConsts.URLACTION.COOKIES_ENABLED, URLPOLICY.ALLOW, , ,0x00000001 , 0);
            MapUrlToZone(new Uri("http://www.baidu.com"));
            //InternetSecurityManager.GetUrlZone("http://www.baidu.com");

            int start = Environment.TickCount;
            while (doc.readyState != "complete")
            {
                Application.DoEvents();
                if (Environment.TickCount - start > 1000000)
                {
                    throw new Exception("The document timed out while loading");
                }
            }
            //hc.documentElement.innerHTML;
            doc.clear();
            doc.close();
        }
        private string GetBodyContent(string htmlDocument, bool outputXhtml)
        {
            // Use tidy over the chapter, if it's needed:
            string goodText = "";
            if (AppSettings.UseTidyOverOutput)
                goodText = new TidyParser(UI, outputXhtml).ParseString(htmlDocument);
            else
                goodText = htmlDocument;

            // Extract the body content:
            HTMLDocumentClass docClass = new HTMLDocumentClass();
            IHTMLDocument2 iDocFirstChapter = (IHTMLDocument2)docClass;
            object[] txtHtml = { goodText };
            iDocFirstChapter.write(txtHtml);

            // return the content of the body:
            return iDocFirstChapter.body.innerHTML.Replace("about:blank", "").Replace("about:", "");
        }
        public DocumentModel Transform(Action<HTMLDocumentClass> interop)
        {
            HTMLDocumentClass doc = new HTMLDocumentClass();
            IHTMLDocument2 doc2 = doc;
            doc2.write(new object[] { this.content });

            interop(doc);

            this.content = doc.documentElement.outerHTML;

            File.WriteAllText(Path.Combine(this.workingDirectory, this.tempFileName), this.content);

            return this;
        }
Beispiel #46
0
 private HTMLDocumentClass getDocumentFromHTML(string html)
 {
     object[] oPageText = { html };
     HTMLDocumentClass myDoc = new HTMLDocumentClass();
     IHTMLDocument2 oMyDoc = (IHTMLDocument2)myDoc;
     oMyDoc.write(oPageText);
     oMyDoc.close();
     return oMyDoc as HTMLDocumentClass;
 }
 private HTMLDocumentClass _getDocumentFromHTML(string html)
 {
     //html = html.Replace("<SCRIPT", "<SC");
     object[] oPageText = { html };
     HTMLDocumentClass myDoc = new HTMLDocumentClass();
     IHTMLDocument2 oMyDoc = (IHTMLDocument2)myDoc;
     oMyDoc.write(oPageText);
     oMyDoc.close();
     return oMyDoc as HTMLDocumentClass;
 }
        public static HTMLDocument LoadDocument(string url)
        {
            Stream data = null;
            StreamReader reader = null;
            try
            {
                string htmlContent = DownloadString(url);

                // Load HTML with injected scripts
                object[] oPageText = { htmlContent };
                HTMLDocument doc = new HTMLDocumentClass();
                IHTMLDocument2 doc2 = (IHTMLDocument2)doc;
                doc2.write(oPageText);

                while (doc2.body == null)
                {
                    Thread.Sleep(5000);
                }

                return doc;
            }
            catch (Exception e)
            {
                //logger.Error(e);
            }
            finally
            {
                // Cleanup
                if (data != null) data.Close();
                if (reader != null) reader.Close();
            }

            return null;
        }
        /// <summary>
        /// Open source files.
        /// If they are not word, they will be converted to HTML.
        /// </summary>
        private void OpenSourceFiles()
        {
            MSWord msWord = null;

            try
            {
                string archivoFinal = (string)Project.SourceFiles[0];
                esWord = MSWord.ItIsWordDocument(archivoFinal);
                dirHtml = null;
                // Si es un documento word, convertirlo a HTML filtrado
                if (esWord)
                {
                    msWord = new MSWord();
                    archivoFinal = ConvertWordSourceFiles(msWord);

                    // Be sure we have closed word, to avoid overlapping between the html read
                    // and the reading from chmprocessor:
                    msWord.Dispose();
                    msWord = null;
                }
                else
                    // There is a single source HTML file.
                    MainSourceFile = (string)Project.SourceFiles[0];

                if (CancellRequested())
                    return;

                if (AppSettings.UseTidyOverInput)
                    new TidyParser(UI).Parse(archivoFinal);

                if (CancellRequested())
                    return;

                // Prepare loading:
                HTMLDocumentClass docClass = new HTMLDocumentClass();
                IPersistStreamInit ips = (IPersistStreamInit)docClass;
                ips.InitNew();

                // Create a timer, to be sure that HTML file load will not be hang up (Sometime happens)
                timerTimeout = new System.Windows.Forms.Timer();
                timerTimeout.Tick += new System.EventHandler(this.timer_Tick);
                timerTimeout.Interval = 60 * 1000;     // 1 minute
                timerTimeout.Enabled = true;

                // Load the file:
                IHTMLDocument2 docLoader = (mshtml.IHTMLDocument2)docClass.createDocumentFromUrl( archivoFinal , null);
                System.Windows.Forms.Application.DoEvents();
                System.Threading.Thread.Sleep(1000);

                String currentStatus = docLoader.readyState;
                log("Reading file " + archivoFinal + ". Status: " + currentStatus , 2 );
                while (currentStatus != "complete" && timerTimeout.Enabled)
                {
                    System.Windows.Forms.Application.DoEvents();
                    System.Threading.Thread.Sleep(500);
                    String newStatus = docLoader.readyState;
                    if (newStatus != currentStatus)
                    {
                        log("Status: " + newStatus, 2);
                        if (currentStatus == "interactive" && newStatus == "uninitialized")
                        {
                            // f*****g shit bug. Try to reload the file:
                            log("Warning. Something wrong happens loading the file. Trying to reopen " + archivoFinal , 2);
                            docClass = new HTMLDocumentClass();
                            ips = (IPersistStreamInit)docClass;
                            ips.InitNew();
                            docLoader = (mshtml.IHTMLDocument2)docClass.createDocumentFromUrl(archivoFinal, null);
                            newStatus = docLoader.readyState;
                            log("Status: " + newStatus, 2);
                        }
                        currentStatus = newStatus;
                    }
                }
                if (!timerTimeout.Enabled)
                    log("Warning: time to load file expired.", 1);
                timerTimeout.Enabled = false;

                // Get a copy of the document:
                HTMLDocumentClass newDocClass = new HTMLDocumentClass();
                iDoc = (IHTMLDocument2)newDocClass;
                object[] txtHtml = { ((IHTMLDocument3)docLoader).documentElement.outerHTML };
                iDoc.writeln(txtHtml);
                try
                {
                    // Needed, otherwise some characters will not be displayed well.
                    iDoc.charset = docLoader.charset;
                }
                catch (Exception ex)
                {
                    log("Warning: Cannot set the charset \"" + docLoader.charset + "\" to the html document. Reason:" + ex.Message, 1);
                    log(ex);
                }
            }
            finally
            {
                if (msWord != null)
                {
                    msWord.Dispose();
                    msWord = null;
                }
            }
        }
        public IHTMLDocument2 GetDocumentWithData(HtmlScriptCollection scripts,string data)
        {
            // new class
            HTMLDocumentClass oDoc = new HTMLDocumentClass();

            // create class interface instances
            IHTMLDocument2 iDoc2a = (IHTMLDocument2)oDoc;
            IHTMLDocument4 iDoc4 = (IHTMLDocument4)oDoc;

            // This is the key ingredient - have to put some HTML
            // in the DOM before using it, even though we're not
            // accessing the DOM.
            iDoc2a.write("<html></html>");
            iDoc2a.close();

            iDoc2a.parentWindow.onerror = this;

            Regex removeScripts = (Regex)_htmlParser.GetRegExpParserScripts["RemoveScripts"];
            MatchCollection matches = removeScripts.Matches(data);

            StringBuilder dataBuffer = new StringBuilder(data);
            scripts = CommentPopups(scripts);

            // parse html
            for (int i=0;i<matches.Count;i++)
            {
                HtmlScript scriptTag = scripts[i];

                Match m = matches[i];
                //dataBuffer.Remove(m.Index,m.Length);

                StringBuilder newScript = new StringBuilder();

                newScript.Append("<script");
                if ( scriptTag.Language.Length != 0 )
                {
                    newScript.AppendFormat(" language=\"{0}\"",scriptTag.Language);
                }

                newScript.Append(">");
                newScript.Append(scriptTag.Text);
                newScript.Append("</script>");

                //dataBuffer.Insert(m.Index,newScript.ToString());
                dataBuffer.Replace(m.Value,newScript.ToString());
            }

            // write data
            iDoc2a.write(dataBuffer.ToString());
            iDoc2a.close();

            return iDoc2a;
        }