public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass) { foreach (IHTMLElement htmlElement in htmlDocumentClass.getElementsByTagName("iframe")) { if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null) { rendererMessage.PropertiesKeys.Add("iframe_src"); rendererMessage.PropertiesValues.Add(htmlElement.getAttribute("src")); } } }
public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass) { foreach (IHTMLElement htmlElement in htmlDocumentClass.all) { if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null) { object href = htmlElement.getAttribute("href"); if (href != null && href is string && !string.IsNullOrEmpty((string)href)) { if (!rendererMessage.PropertiesValues.Contains(href)) { rendererMessage.PropertiesKeys.Add("element_href"); rendererMessage.PropertiesValues.Add(href); } } if (htmlElement.innerHTML != null && htmlElement.innerHTML.ToLowerInvariant().Contains("href")) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(htmlElement.innerHTML); if (htmlDocument.DocumentNode.Attributes != null) { foreach (HtmlAgilityPack.HtmlNode htmlNode in htmlDocument.DocumentNode.Descendants()) { if (htmlNode.Attributes != null) { foreach (HtmlAttribute htmlAttribute in htmlNode.Attributes) { if (htmlAttribute.Name.ToLowerInvariant().Contains("href")) { if (!rendererMessage.PropertiesValues.Contains(htmlAttribute.Value)) { rendererMessage.PropertiesKeys.Add("element_href"); rendererMessage.PropertiesValues.Add(htmlAttribute.Value); } } } } } } } } } }
public override void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass) { foreach (IHTMLElement htmlElement in htmlDocumentClass.getElementsByTagName("input")) { if (rendererMessage != null && rendererMessage.PropertiesKeys != null && rendererMessage.PropertiesValues != null) { object id = htmlElement.getAttribute("id"); if (id != null && id is string && !string.IsNullOrEmpty((string)id)) { if (!rendererMessage.PropertiesValues.Contains(id)) { rendererMessage.PropertiesKeys.Add("input_id"); rendererMessage.PropertiesValues.Add(id); } } } } }
public abstract void PerformAction(RendererMessage rendererMessage, HTMLDocumentClass htmlDocumentClass);
public Renderer() { try { InitializeComponent(); /**/ //remove limits from service point manager ServicePointManager.MaxServicePoints = 10000; ServicePointManager.DefaultConnectionLimit = 10000; ServicePointManager.CheckCertificateRevocationList = true; ServicePointManager.Expect100Continue = false; ServicePointManager.MaxServicePointIdleTime = 1000 * 30; ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls; ServicePointManager.UseNagleAlgorithm = false; //Use if you encounter certificate errors... ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(delegate { return true; }); /**/ ApplicationSettings applicationSettings = new ApplicationSettings(); _arachnodeDAO = new ArachnodeDAO(applicationSettings.ConnectionString); _htmlRenderer = new HtmlRenderer(_arachnodeDAO); Closed += Renderer_Closed; if (_useAxWebBrowser && !DesignMode) { object o = axWebBrowser1.GetOcx(); IOleObject oleObject = o as IOleObject; oleObject.SetClientSite(this); } axWebBrowser1.Silent = true; if (_useAxWebBrowser) { Thread thread = new Thread(() => { while (true) { Thread.Sleep(1000 * 60 * 1); if (_stopwatch.Elapsed.TotalMinutes > 1) { _stopwatch.Reset(); _stopwatch.Start(); axWebBrowser1.Stop(); axWebBrowser1_DocumentComplete(this, null); } } }); thread.SetApartmentState(ApartmentState.STA); thread.Start(); } /**/ //uncomment these to use... //_rendererActions.Add(new IFrames()); //_rendererActions.Add(new Hrefs()); //_rendererActions.Add(new Inputs()); _htmlRenderer.DocumentComplete += _htmlParser_DocumentComplete; /**/ if (_debugSingleAbsoluteUri || _debugMultipleAbsoluteUris) { return; } /**/ #region Default Crawling Thread //both should be set to 'false' for default crawling execution... if (!_debugSingleAbsoluteUri && !_debugMultipleAbsoluteUris) { _stopwatchTotal.Reset(); _stopwatchTotal.Start(); _thread = new Thread(delegate() { try { MessageQueue rendererMessageQueue = new MessageQueue(".\\private$\\Renderer_Renderers:" + 0); rendererMessageQueue.Formatter = new XmlMessageFormatter(new[] { typeof(RendererMessage) }); while (rendererMessageQueue.Peek() == null) { Thread.Sleep(10); } Message message = rendererMessageQueue.Receive(); _rendererMessage = (RendererMessage)message.Body; /**/ rendererMessageQueue = new MessageQueue(".\\private$\\Renderer_Renderers:" + _rendererMessage.ThreadNumber); rendererMessageQueue.Formatter = new XmlMessageFormatter(new[] { typeof(RendererMessage) }); _engineMessageQueue = new MessageQueue(".\\private$\\Renderer_Engine:" + _rendererMessage.ThreadNumber); /**/ //remoting code for Marshalling the HTMLDocumentClass... BinaryClientFormatterSinkProvider clientProvider = null; BinaryServerFormatterSinkProvider serverProvider = new BinaryServerFormatterSinkProvider(); serverProvider.TypeFilterLevel = TypeFilterLevel.Full; Hashtable props = new Hashtable(); props["name"] = "Renderer" + _rendererMessage.ThreadNumber; props["portName"] = "Renderer" + _rendererMessage.ThreadNumber; props["authorizedGroup"] = WindowsIdentity.GetCurrent().Name; //props["typeFilterLevel"] = TypeFilterLevel.Full; IpcChannel channel = new IpcChannel(props, clientProvider, serverProvider); ChannelServices.RegisterChannel(channel, false); RemotingConfiguration.RegisterWellKnownServiceType(typeof(Renderer), "Renderer" + _rendererMessage.ThreadNumber, WellKnownObjectMode.SingleCall); RemotingServices.Marshal(this, "Renderer" + _rendererMessage.ThreadNumber); /**/ tsslStatus.Text = ".\\private$\\Renderer_Engine:" + _rendererMessage.ThreadNumber + " Awaiting CrawlRequests..."; while (true && !_abortThread) { try { message = rendererMessageQueue.Receive(); _stopwatch.Reset(); _stopwatch.Start(); _rendererMessage = (RendererMessage)message.Body; _htmlRenderer.CrawlRequestTimeoutInMinutes = _rendererMessage.CrawlRequestTimeoutInMinutes; tsslStatus.Text = DateTime.Now.ToLongTimeString() + " .\\private$\\Renderer_Engine:" + _rendererMessage.ThreadNumber + " " + _rendererMessage.AbsoluteUri + " TimeTakenToReceiveMessage: " + _stopwatch.Elapsed.TotalSeconds; if (!_rendererMessage.Kill) { switch (_rendererMessage.RenderAction) { case RenderAction.Render: if (!string.IsNullOrEmpty(_rendererMessage.ProxyServer)) { ConnectionProxy.SetConnectionProxy(_rendererMessage.ProxyServer.TrimEnd('/')); } else { ConnectionProxy.RestoreSystemProxy(); } if (!string.IsNullOrEmpty(_rendererMessage.Cookie)) { //key1=value1;key2=value2; if (!string.IsNullOrEmpty(_rendererMessage.Cookie)) { string[] cookieSplit = _rendererMessage.Cookie.Split(";".ToCharArray()); foreach (string cookieSplit2 in cookieSplit) { string[] cookieSplit3 = cookieSplit2.Split("=".ToCharArray()); if (cookieSplit3.Length >= 2) { StringBuilder stringBuilder = new StringBuilder(); for (int i = 1; i < cookieSplit3.Length; i++) { stringBuilder.Append(cookieSplit3[i] + "="); } string value = stringBuilder.ToString().TrimEnd("=".ToCharArray()); InternetSetCookie(_rendererMessage.AbsoluteUri, cookieSplit3[0], cookieSplit3[1]); } } } } if (_useAxWebBrowser) { object userAgent = "User-Agent: " + _rendererMessage.UserAgent; object o1 = null; object o2 = null; object o3 = null; DateTime startTime = DateTime.Now; axWebBrowser1.Navigate(_rendererMessage.AbsoluteUri, ref o1, ref o2, ref o3, ref userAgent); if (_modifyDOM) { bool wasDOMModified = false; while (axWebBrowser1.ReadyState != tagREADYSTATE.READYSTATE_COMPLETE && DateTime.Now.Subtract(startTime).Duration().TotalMinutes < _rendererMessage.CrawlRequestTimeoutInMinutes) { Thread.Sleep(100); if (axWebBrowser1.ReadyState == tagREADYSTATE.READYSTATE_INTERACTIVE) { if (!wasDOMModified) { _htmlRenderer.ModifyDOM((IHTMLDocument2)axWebBrowser1.Document, false); wasDOMModified = true; } } } } } else { _htmlRenderer.Render(_rendererMessage.AbsoluteUri); } break; case RenderAction.Back: axWebBrowser1.GoBack(); break; case RenderAction.Forward: axWebBrowser1.GoForward(); break; } try { foreach (Process process in Process.GetProcesses()) { if (process.ProcessName.ToLowerInvariant() == "iexplore" || process.ProcessName.ToLowerInvariant() == "chrome" || process.ProcessName.ToLowerInvariant() == "vsjitdebugger" || process.MainWindowTitle.ToLowerInvariant() == "web browser" || process.MainWindowTitle.ToLowerInvariant() == "renderer" || process.MainWindowTitle.ToLowerInvariant() == "visual studio just-in-time debugger") { //if (MessageBox.Show("Close? 1", "Arachnode.Renderer", MessageBoxButtons.YesNo) == DialogResult.Yes) //{ // process.Kill(); //} } } IntPtr window = WinApis.FindWindowByCaption(IntPtr.Zero, "Web Browser"); if (window != IntPtr.Zero) { WinApis.CloseWindow(window); } window = WinApis.FindWindowByCaption(IntPtr.Zero, "Message from webpage"); if (window != IntPtr.Zero) { WinApis.CloseWindow(window); } } catch (Exception exception) { //MessageBox.Show(exception.Message); //MessageBox.Show(exception.StackTrace); _arachnodeDAO.InsertException(null, null, exception, false); } } else { //if (MessageBox.Show("Close? 2", "Arachnode.Renderer", MessageBoxButtons.YesNo) == DialogResult.Yes) //{ // Process.GetCurrentProcess().Kill(); //} } } catch (Exception exception) { //MessageBox.Show(exception.Message); //MessageBox.Show(exception.StackTrace); _arachnodeDAO.InsertException(null, null, exception, false); } } } catch (Exception exception) { //MessageBox.Show(exception.Message); //MessageBox.Show(exception.StackTrace); _arachnodeDAO.InsertException(null, null, exception, false); } });