Esempio n. 1
0
        private void collecttheurl()
        {
            bool foundTheURL = false;

            mshtml.IHTMLDocument2 htmlDoc = webBrowser1.Document.DomDocument as mshtml.IHTMLDocument2;

            List <mshtml.IHTMLDivElement> allDiv = htmlDoc.all.OfType <mshtml.IHTMLDivElement>().ToList();

            foreach (IHTMLElement div in allDiv)
            {
                //write2log(curElement.outerHTML);
                //write2log(curElement.tostring());
                //write2log(curElement.className);
                if (div.className == "rc")
                {
                    write2log("found a rc div");
                    IHTMLDOMNode divNode = (IHTMLDOMNode)div;
                    //write2log(div.innerHTML);
                    //write2log(div.className);

                    //var child = ((IHTMLDOMNode)divnode).firstChild;

                    if (!divNode.hasChildNodes())
                    {
                        continue;
                    }

                    IHTMLDOMChildrenCollection children = (IHTMLDOMChildrenCollection)divNode.childNodes;
                    foreach (IHTMLDOMNode child in children)
                    {
                        //write2log(child.GetType().Name);
                        if (child != null && child.GetType().Name == "HTMLHeaderElementClass")
                        {
                            if (child.hasChildNodes())
                            {
                                IHTMLAnchorElement ancharchild = (IHTMLAnchorElement)child.firstChild;
                                //write2log(ancharchild.GetType().Name);
                                if (ancharchild != null && ancharchild.GetType().Name == "HTMLAnchorElementClass")
                                {
                                    write2urlList(GoogleQueryConf.queryterms[GoogleQueryConf.queryIndex] + "\t => \t" + ancharchild.href);
                                    write2urlList(ancharchild.href);
                                    foundTheURL = true;
                                    break;
                                }
                            }
                        }
                        //write2log(child.ToString());
                    }
                }

                if (foundTheURL)
                {
                    break;
                }
            }
            return;
        }