Example #1
0
 public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass)
 {
     foreach (IHTMLElement htmlElement in htmlDocumentClass.getElementsByTagName("iframe"))
     {
         if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null)
         {
             rendererMessage.PropertiesKeys.Add("iframe_src");
             rendererMessage.PropertiesValues.Add(htmlElement.getAttribute("src"));
         }
     }
 }
Example #2
0
        public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass)
        {
            foreach (IHTMLElement htmlElement in htmlDocumentClass.all)
            {
                if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null)
                {
                    object href = htmlElement.getAttribute("href");

                    if (href != null && href is string && !string.IsNullOrEmpty((string)href))
                    {
                        if (!rendererMessage.PropertiesValues.Contains(href))
                        {
                            rendererMessage.PropertiesKeys.Add("element_href");
                            rendererMessage.PropertiesValues.Add(href);
                        }
                    }

                    if (htmlElement.innerHTML != null && htmlElement.innerHTML.ToLowerInvariant().Contains("href"))
                    {
                        HtmlDocument htmlDocument = new HtmlDocument();

                        htmlDocument.LoadHtml(htmlElement.innerHTML);

                        if (htmlDocument.DocumentNode.Attributes != null)
                        {
                            foreach (HtmlAgilityPack.HtmlNode htmlNode in htmlDocument.DocumentNode.Descendants())
                            {
                                if (htmlNode.Attributes != null)
                                {
                                    foreach (HtmlAttribute htmlAttribute in htmlNode.Attributes)
                                    {
                                        if (htmlAttribute.Name.ToLowerInvariant().Contains("href"))
                                        {
                                            if (!rendererMessage.PropertiesValues.Contains(htmlAttribute.Value))
                                            {
                                                rendererMessage.PropertiesKeys.Add("element_href");
                                                rendererMessage.PropertiesValues.Add(htmlAttribute.Value);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
Example #3
0
        public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass)
        {
            foreach (IHTMLElement htmlElement in htmlDocumentClass.getElementsByTagName("input"))
            {
                if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null)
                {
                    object id = htmlElement.getAttribute("id");

                    if (id != null && id is string && !string.IsNullOrEmpty((string)id))
                    {
                        if (!rendererMessage.PropertiesValues.Contains(id))
                        {
                            rendererMessage.PropertiesKeys.Add("input_id");
                            rendererMessage.PropertiesValues.Add(id);
                        }
                    }
                }
            }
        }
 public abstract void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass);
Example #5
0
        public Renderer()
        {
            try
            {
                InitializeComponent();

                /**/

                //remove limits from service point manager
                ServicePointManager.MaxServicePoints = 10000;
                ServicePointManager.DefaultConnectionLimit = 10000;
                ServicePointManager.CheckCertificateRevocationList = true;
                ServicePointManager.Expect100Continue = false;
                ServicePointManager.MaxServicePointIdleTime = 1000 * 30;
                ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls;
                ServicePointManager.UseNagleAlgorithm = false;

                //Use if you encounter certificate errors...
                ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(delegate { return true; });

                /**/

                ApplicationSettings applicationSettings = new ApplicationSettings();

                _arachnodeDAO = new ArachnodeDAO(applicationSettings.ConnectionString);

                _htmlRenderer = new HtmlRenderer(_arachnodeDAO);

                Closed += Renderer_Closed;

                if (_useAxWebBrowser && !DesignMode)
                {
                    object o = axWebBrowser1.GetOcx();

                    IOleObject oleObject = o as IOleObject;

                    oleObject.SetClientSite(this);
                }

                axWebBrowser1.Silent = true;

                if (_useAxWebBrowser)
                {
                    Thread thread = new Thread(() =>
                                                   {
                                                       while (true)
                                                       {
                                                           Thread.Sleep(1000 * 60 * 1);

                                                           if (_stopwatch.Elapsed.TotalMinutes > 1)
                                                           {
                                                               _stopwatch.Reset();
                                                               _stopwatch.Start();

                                                               axWebBrowser1.Stop();

                                                               axWebBrowser1_DocumentComplete(this, null);
                                                           }
                                                       }
                                                   });

                    thread.SetApartmentState(ApartmentState.STA);
                    thread.Start();
                }

                /**/

                //uncomment these to use...
                //_rendererActions.Add(new IFrames());
                //_rendererActions.Add(new Hrefs());
                //_rendererActions.Add(new Inputs());

                _htmlRenderer.DocumentComplete += _htmlParser_DocumentComplete;

                /**/

                if (_debugSingleAbsoluteUri || _debugMultipleAbsoluteUris)
                {
                    return;
                }

                /**/

                #region Default Crawling Thread
                //both should be set to 'false' for default crawling execution...
                if (!_debugSingleAbsoluteUri && !_debugMultipleAbsoluteUris)
                {
                    _stopwatchTotal.Reset();
                    _stopwatchTotal.Start();

                    _thread = new Thread(delegate()
                                             {
                                                 try
                                                 {
                                                     MessageQueue rendererMessageQueue = new MessageQueue(".\\private$\\Renderer_Renderers:" + 0);
                                                     rendererMessageQueue.Formatter = new XmlMessageFormatter(new[] { typeof(RendererMessage) });

                                                     while (rendererMessageQueue.Peek() == null)
                                                     {
                                                         Thread.Sleep(10);
                                                     }

                                                     Message message = rendererMessageQueue.Receive();

                                                     _rendererMessage = (RendererMessage)message.Body;

                                                     /**/

                                                     rendererMessageQueue = new MessageQueue(".\\private$\\Renderer_Renderers:" + _rendererMessage.ThreadNumber);
                                                     rendererMessageQueue.Formatter = new XmlMessageFormatter(new[] { typeof(RendererMessage) });

                                                     _engineMessageQueue = new MessageQueue(".\\private$\\Renderer_Engine:" + _rendererMessage.ThreadNumber);

                                                     /**/

                                                     //remoting code for Marshalling the HTMLDocumentClass...
                                                     BinaryClientFormatterSinkProvider clientProvider = null;
                                                     BinaryServerFormatterSinkProvider serverProvider = new BinaryServerFormatterSinkProvider();
                                                     serverProvider.TypeFilterLevel = TypeFilterLevel.Full;

                                                     Hashtable props = new Hashtable();
                                                     props["name"] = "Renderer" + _rendererMessage.ThreadNumber;
                                                     props["portName"] = "Renderer" + _rendererMessage.ThreadNumber;
                                                     props["authorizedGroup"] = WindowsIdentity.GetCurrent().Name;
                                                     //props["typeFilterLevel"] = TypeFilterLevel.Full;

                                                     IpcChannel channel = new IpcChannel(props, clientProvider, serverProvider);

                                                     ChannelServices.RegisterChannel(channel, false);

                                                     RemotingConfiguration.RegisterWellKnownServiceType(typeof(Renderer), "Renderer" + _rendererMessage.ThreadNumber, WellKnownObjectMode.SingleCall);
                                                     RemotingServices.Marshal(this, "Renderer" + _rendererMessage.ThreadNumber);

                                                     /**/

                                                     tsslStatus.Text = ".\\private$\\Renderer_Engine:" + _rendererMessage.ThreadNumber + " Awaiting CrawlRequests...";

                                                     while (true && !_abortThread)
                                                     {
                                                         try
                                                         {
                                                             message = rendererMessageQueue.Receive();

                                                             _stopwatch.Reset();
                                                             _stopwatch.Start();

                                                             _rendererMessage = (RendererMessage)message.Body;
                                                             _htmlRenderer.CrawlRequestTimeoutInMinutes = _rendererMessage.CrawlRequestTimeoutInMinutes;

                                                             tsslStatus.Text = DateTime.Now.ToLongTimeString() + " .\\private$\\Renderer_Engine:" + _rendererMessage.ThreadNumber + " " + _rendererMessage.AbsoluteUri + " TimeTakenToReceiveMessage: " + _stopwatch.Elapsed.TotalSeconds;

                                                             if (!_rendererMessage.Kill)
                                                             {
                                                                 switch (_rendererMessage.RenderAction)
                                                                 {
                                                                     case RenderAction.Render:
                                                                         if (!string.IsNullOrEmpty(_rendererMessage.ProxyServer))
                                                                         {
                                                                             ConnectionProxy.SetConnectionProxy(_rendererMessage.ProxyServer.TrimEnd('/'));
                                                                         }
                                                                         else
                                                                         {
                                                                             ConnectionProxy.RestoreSystemProxy();
                                                                         }

                                                                         if (!string.IsNullOrEmpty(_rendererMessage.Cookie))
                                                                         {
                                                                             //key1=value1;key2=value2;

                                                                             if (!string.IsNullOrEmpty(_rendererMessage.Cookie))
                                                                             {
                                                                                 string[] cookieSplit = _rendererMessage.Cookie.Split(";".ToCharArray());

                                                                                 foreach (string cookieSplit2 in cookieSplit)
                                                                                 {
                                                                                     string[] cookieSplit3 = cookieSplit2.Split("=".ToCharArray());

                                                                                     if (cookieSplit3.Length >= 2)
                                                                                     {
                                                                                         StringBuilder stringBuilder = new StringBuilder();

                                                                                         for (int i = 1; i < cookieSplit3.Length; i++)
                                                                                         {
                                                                                             stringBuilder.Append(cookieSplit3[i] + "=");
                                                                                         }
                                                                                         string value = stringBuilder.ToString().TrimEnd("=".ToCharArray());

                                                                                         InternetSetCookie(_rendererMessage.AbsoluteUri, cookieSplit3[0], cookieSplit3[1]);
                                                                                     }
                                                                                 }
                                                                             }
                                                                         }

                                                                         if (_useAxWebBrowser)
                                                                         {
                                                                             object userAgent = "User-Agent: " + _rendererMessage.UserAgent;
                                                                             object o1 = null;
                                                                             object o2 = null;
                                                                             object o3 = null;
                                                                             DateTime startTime = DateTime.Now;

                                                                             axWebBrowser1.Navigate(_rendererMessage.AbsoluteUri, ref o1, ref o2, ref o3, ref userAgent);

                                                                             if (_modifyDOM)
                                                                             {
                                                                                 bool wasDOMModified = false;

                                                                                 while (axWebBrowser1.ReadyState != tagREADYSTATE.READYSTATE_COMPLETE && DateTime.Now.Subtract(startTime).Duration().TotalMinutes < _rendererMessage.CrawlRequestTimeoutInMinutes)
                                                                                 {
                                                                                     Thread.Sleep(100);

                                                                                     if (axWebBrowser1.ReadyState == tagREADYSTATE.READYSTATE_INTERACTIVE)
                                                                                     {
                                                                                         if (!wasDOMModified)
                                                                                         {
                                                                                             _htmlRenderer.ModifyDOM((IHTMLDocument2)axWebBrowser1.Document, false);

                                                                                             wasDOMModified = true;
                                                                                         }
                                                                                     }
                                                                                 }
                                                                             }
                                                                         }
                                                                         else
                                                                         {
                                                                             _htmlRenderer.Render(_rendererMessage.AbsoluteUri);
                                                                         }
                                                                         break;
                                                                     case RenderAction.Back:
                                                                         axWebBrowser1.GoBack();
                                                                         break;
                                                                     case RenderAction.Forward:
                                                                         axWebBrowser1.GoForward();
                                                                         break;
                                                                 }

                                                                 try
                                                                 {
                                                                     foreach (Process process in Process.GetProcesses())
                                                                     {
                                                                         if (process.ProcessName.ToLowerInvariant() == "iexplore" ||
                                                                             process.ProcessName.ToLowerInvariant() == "chrome" ||
                                                                             process.ProcessName.ToLowerInvariant() == "vsjitdebugger" ||
                                                                             process.MainWindowTitle.ToLowerInvariant() == "web browser" ||
                                                                             process.MainWindowTitle.ToLowerInvariant() == "renderer" ||
                                                                             process.MainWindowTitle.ToLowerInvariant() == "visual studio just-in-time debugger")
                                                                         {
                                                                             //if (MessageBox.Show("Close? 1", "Arachnode.Renderer", MessageBoxButtons.YesNo) == DialogResult.Yes)
                                                                             //{
                                                                             //    process.Kill();
                                                                             //}
                                                                         }
                                                                     }

                                                                     IntPtr window = WinApis.FindWindowByCaption(IntPtr.Zero, "Web Browser");

                                                                     if (window != IntPtr.Zero)
                                                                     {
                                                                         WinApis.CloseWindow(window);
                                                                     }

                                                                     window = WinApis.FindWindowByCaption(IntPtr.Zero, "Message from webpage");

                                                                     if (window != IntPtr.Zero)
                                                                     {
                                                                         WinApis.CloseWindow(window);
                                                                     }
                                                                 }
                                                                 catch (Exception exception)
                                                                 {
                                                                     //MessageBox.Show(exception.Message);
                                                                     //MessageBox.Show(exception.StackTrace);

                                                                     _arachnodeDAO.InsertException(null, null, exception, false);
                                                                 }
                                                             }
                                                             else
                                                             {
                                                                 //if (MessageBox.Show("Close? 2", "Arachnode.Renderer", MessageBoxButtons.YesNo) == DialogResult.Yes)
                                                                 //{
                                                                 //    Process.GetCurrentProcess().Kill();
                                                                 //}
                                                             }
                                                         }
                                                         catch (Exception exception)
                                                         {
                                                             //MessageBox.Show(exception.Message);
                                                             //MessageBox.Show(exception.StackTrace);

                                                             _arachnodeDAO.InsertException(null, null, exception, false);
                                                         }
                                                     }
                                                 }
                                                 catch (Exception exception)
                                                 {
                                                     //MessageBox.Show(exception.Message);
                                                     //MessageBox.Show(exception.StackTrace);

                                                     _arachnodeDAO.InsertException(null, null, exception, false);
                                                 }
                                             });