/**************************************************************************/ public void ConfigureCustomFilterForm( MacroscopeCustomFilterForm NewContainerForm, MacroscopeCustomFilters NewCustomFilter ) { this.ContainerForm = NewContainerForm; this.CustomFilter = NewCustomFilter; int Max = this.CustomFilter.GetSize(); TableLayoutPanel Table = this.tableLayoutPanelControlsGrid; Table.ColumnCount = 3; Table.RowCount = Max + 1; { List <string> ColumnLabels = new List <string> (3) { "", "Filter Action", "Search String" }; for (int i = 0; i < ColumnLabels.Count; i++) { Label TextLabelCol = new Label(); TextLabelCol.Text = ColumnLabels[i]; TextLabelCol.Dock = DockStyle.Fill; TextLabelCol.Margin = new Padding(5, 5, 5, 5); TextLabelCol.TextAlign = ContentAlignment.BottomLeft; TextLabelCol.Height = 20; Table.Controls.Add(TextLabelCol); } } for (int Slot = 0; Slot < Max; Slot++) { Label TextBoxLabel = new Label(); ComboBox StateComboBoxFilter = new ComboBox(); TextBox TextBoxFilter = new TextBox(); TextBoxLabel.Text = string.Format("Custom Filter {0}", Slot + 1); TextBoxLabel.TextAlign = ContentAlignment.MiddleRight; TextBoxLabel.Dock = DockStyle.Fill; TextBoxLabel.Margin = new Padding(5, 5, 5, 5); TextBoxLabel.Width = 50; StateComboBoxFilter.Name = string.Format("ComboBoxFilter{0}", Slot + 1); StateComboBoxFilter.Items.Add("No action"); StateComboBoxFilter.Items.Add("Must have text"); StateComboBoxFilter.Items.Add("Must not have text"); StateComboBoxFilter.Items.Add("Must have regex"); StateComboBoxFilter.Items.Add("Must not have regex"); StateComboBoxFilter.DropDownStyle = ComboBoxStyle.DropDownList; StateComboBoxFilter.SelectedIndex = 0; StateComboBoxFilter.Margin = new Padding(5, 5, 5, 5); StateComboBoxFilter.Width = 100; TextBoxFilter.Name = string.Format("TextBoxFilter{0}", Slot + 1); TextBoxFilter.Dock = DockStyle.Fill; TextBoxFilter.Margin = new Padding(5, 5, 5, 5); TextBoxFilter.Tag = Slot.ToString(); TextBoxFilter.KeyUp += this.CallbackTextBoxKeyUp; TextBoxFilter.TextChanged += CallbackTextBoxExpressionTextChanged; Table.Controls.Add(TextBoxLabel); Table.Controls.Add(StateComboBoxFilter); Table.Controls.Add(TextBoxFilter); this.TextBoxLabels.Add(TextBoxLabel); this.StateComboBoxFilters.Add(StateComboBoxFilter); this.TextBoxFilters.Add(TextBoxFilter); } // Add empty last row for space adjustment for (int i = 0; i < Table.ColumnCount; i++) { Label TextLabelCol = new Label(); TextLabelCol.Text = ""; Table.Controls.Add(TextLabelCol); } }
/** -------------------------------------------------------------------- **/ private async Task _ProcessCssPage() { MacroscopeHttpTwoClient Client = this.DocCollection.GetJobMaster().GetHttpClient(); MacroscopeHttpTwoClientResponse Response = null; string ResponseErrorCondition = null; DebugMsg(string.Format("ProcessCssPage: {0}", "")); try { Response = await Client.Get( this.GetUri(), this.ConfigureCssPageRequestHeadersCallback, this.PostProcessRequestHttpHeadersCallback ); } catch (MacroscopeDocumentException ex) { this.DebugMsg(string.Format("_ProcessCssPage :: MacroscopeDocumentException: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessCssPage", ex.Message); } catch (Exception ex) { this.DebugMsg(string.Format("_ProcessCssPage :: Exception: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessCssPage", ex.Message); } /** Set Base URL ----------------------------------------------------- **/ if (Response != null) { Uri CssUri = this.GetUri(); string CssLocalPath = CssUri.LocalPath; string CssPath = System.IO.Path.GetDirectoryName(CssLocalPath); Uri NewCssUri = new Uri(new UriBuilder(scheme: CssUri.Scheme, host: CssUri.Host, port: CssUri.Port, pathValue: CssPath).ToString()); this.SetBaseHref(NewCssUri.ToString()); } /** Process The Response Body ---------------------------------------- **/ if (Response != null) { string RawData = ""; this.ProcessResponseHttpHeaders(Response: Response); /** Get Response Body ---------------------------------------------- **/ try { DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType)); RawData = Response.GetContentAsString(); this.SetContentLength(Length: RawData.Length); // May need to find bytes length this.SetWasDownloaded(true); } catch (Exception ex) { DebugMsg(string.Format("Exception: {0}", ex.Message)); this.SetStatusCode(HttpStatusCode.Ambiguous); this.SetContentLength(Length: 0); } if (!string.IsNullOrEmpty(RawData)) { try { CssLoader Parser = new CssLoader(); CssStyleSheet Stylesheet = Parser.ParseSheet(RawData, this.GetUri(), this.GetUri()); this.ProcessCssOutlinks(Stylesheet: Stylesheet); } catch (Exception ex) { this.DebugMsg(string.Format("ProcessHtmlAttributeCssLinks: {0}", ex.Message)); this.AddRemark("ProcessHtmlAttributeCssLinks", ex.Message); } } else { DebugMsg(string.Format("ProcessCssPage: ERROR: {0}", this.GetUrl())); } /** Custom Filters ------------------------------------------------- **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetCustomFiltersEnable() && MacroscopePreferencesManager.GetCustomFiltersApplyToCss()) { MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter(); if ((CustomFilter != null) && (CustomFilter.IsEnabled())) { this.ProcessGenericCustomFiltered( CustomFilter: CustomFilter, GenericText: RawData ); } } } /** Data Extractors ------------------------------------------------ **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetDataExtractorsEnable() && MacroscopePreferencesManager.GetDataExtractorsApplyToCss()) { this.ProcessGenericDataExtractors(GenericText: RawData); } } /** Title ---------------------------------------------------------- **/ { MatchCollection reMatches = Regex.Matches(this.DocUrl, "/([^/]+)$"); string DocumentTitle = null; foreach (Match match in reMatches) { if (match.Groups[1].Value.Length > 0) { DocumentTitle = match.Groups[1].Value.ToString(); break; } } if (DocumentTitle != null) { this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING); DebugMsg(string.Format("TITLE: {0}", this.GetTitle())); } else { DebugMsg(string.Format("TITLE: {0}", "MISSING")); } } } if (ResponseErrorCondition != null) { this.ProcessErrorCondition(ResponseErrorCondition); } }
/** -------------------------------------------------------------------- **/ private async Task _ProcessXmlPage() { XmlDocument XmlDoc = null; MacroscopeHttpTwoClient Client = this.DocCollection.GetJobMaster().GetHttpClient(); MacroscopeHttpTwoClientResponse Response = null; string ResponseErrorCondition = null; try { Response = await Client.Get( this.GetUri(), this.ConfigureXmlPageRequestHeadersCallback, this.PostProcessRequestHttpHeadersCallback ); } catch (MacroscopeDocumentException ex) { this.DebugMsg(string.Format("_ProcessXmlPage :: MacroscopeDocumentException: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessXmlPage", ex.Message); } catch (Exception ex) { this.DebugMsg(string.Format("_ProcessXmlPage :: Exception: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessXmlPage", ex.Message); } if (Response != null) { string RawData = ""; this.ProcessResponseHttpHeaders(Response: Response); // Get Response Body try { DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType)); RawData = Response.GetContentAsString(); this.SetContentLength(Length: RawData.Length); // May need to find bytes length this.SetWasDownloaded(true); this.SetChecksum(RawData); } catch (Exception ex) { DebugMsg(string.Format("Exception: {0}", ex.Message)); this.SetStatusCode(HttpStatusCode.BadRequest); RawData = ""; this.SetContentLength(Length: 0); } if (!string.IsNullOrEmpty(RawData)) { XmlDoc = new XmlDocument(); try { XmlDoc.LoadXml(RawData); } catch (XmlException ex) { DebugMsg(string.Format("XmlException: {0}", ex.Message)); } catch (Exception ex) { DebugMsg(string.Format("Exception: {0}", ex.Message)); } DebugMsg(string.Format("XmlDoc: {0}", XmlDoc)); } else { DebugMsg(string.Format("RawData: {0}", "EMPTY")); } /** Custom Filters ------------------------------------------------- **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetCustomFiltersEnable() && MacroscopePreferencesManager.GetCustomFiltersApplyToXml()) { MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter(); if ((CustomFilter != null) && (CustomFilter.IsEnabled())) { this.ProcessGenericCustomFiltered( CustomFilter: CustomFilter, GenericText: RawData ); } } } /** Data Extractors ------------------------------------------------ **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetDataExtractorsEnable() && MacroscopePreferencesManager.GetDataExtractorsApplyToXml()) { this.ProcessGenericDataExtractors(GenericText: RawData); } } /** ---------------------------------------------------------------- **/ if ((XmlDoc != null) && (XmlDoc.DocumentElement != null)) { if (this.DetectSitemapXmlDocument(XmlDoc)) { DebugMsg(string.Format("ProcessXmlPage: {0} :: {1}", "SITEMAP DETECTED", this.GetUrl())); this.SetDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML); this.ProcessSitemapXmlOutlinks(XmlDoc); } } /** ---------------------------------------------------------------- **/ if (RawData != null) { this.SetDocumentText(Text: RawData); } /** ---------------------------------------------------------------- **/ } if (ResponseErrorCondition != null) { this.ProcessErrorCondition(ResponseErrorCondition); } }
/** -------------------------------------------------------------------- **/ private async Task _ProcessJavascriptPage() { MacroscopeHttpTwoClient Client = this.DocCollection.GetJobMaster().GetHttpClient(); MacroscopeHttpTwoClientResponse Response = null; string ResponseErrorCondition = null; try { Response = await Client.Get( this.GetUri(), this.ConfigureJavascriptPageRequestHeadersCallback, this.PostProcessRequestHttpHeadersCallback ); } catch (MacroscopeDocumentException ex) { this.DebugMsg(string.Format("_ProcessJavascriptPage :: MacroscopeDocumentException: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessJavascriptPage", ex.Message); } catch (Exception ex) { this.DebugMsg(string.Format("_ProcessJavascriptPage :: Exception: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessJavascriptPage", ex.Message); } if (Response != null) { string RawData = ""; this.ProcessResponseHttpHeaders(Response: Response); /** Get Response Body ---------------------------------------------- **/ try { DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType)); /* * Encoding encUseEncoding = Encoding.UTF8; * * if( this.GetCharacterEncoding() != null ) * { * encUseEncoding = this.GetCharacterEncoding(); * } * else * { * encUseEncoding = this.JavascriptSniffCharset(); * } */ RawData = Response.GetContentAsString(); this.SetContentLength(Length: RawData.Length); // May need to find bytes length this.SetChecksum(RawData); } catch (Exception ex) { DebugMsg(string.Format("Exception: {0}", ex.Message)); this.SetStatusCode(HttpStatusCode.Ambiguous); RawData = ""; this.SetContentLength(Length: 0); } /** Custom Filters ------------------------------------------------- **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetCustomFiltersEnable() && MacroscopePreferencesManager.GetCustomFiltersApplyToJavascripts()) { MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter(); if ((CustomFilter != null) && (CustomFilter.IsEnabled())) { this.ProcessGenericCustomFiltered( CustomFilter: CustomFilter, GenericText: RawData ); } } } /** Data Extractors ------------------------------------------------ **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetDataExtractorsEnable() && MacroscopePreferencesManager.GetDataExtractorsApplyToJavascripts()) { this.ProcessGenericDataExtractors(GenericText: RawData); } } /** Title ---------------------------------------------------------- **/ { MatchCollection reMatches = Regex.Matches(this.DocUrl, "/([^/]+)$"); string DocumentTitle = null; foreach (Match match in reMatches) { if (match.Groups[1].Value.Length > 0) { DocumentTitle = match.Groups[1].Value.ToString(); break; } } if (DocumentTitle != null) { this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING); DebugMsg(string.Format("TITLE: {0}", this.GetTitle())); } else { DebugMsg(string.Format("TITLE: {0}", "MISSING")); } } /** ---------------------------------------------------------------- **/ } if (ResponseErrorCondition != null) { this.ProcessErrorCondition(ResponseErrorCondition); } }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeCustomFilters CustomFilter ) { if (this.FilterColOffset == -1) { throw (new Exception("this.FilterColOffset invalid")); } if (DocCollection.CountDocuments() == 0) { return; } MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize()); List <ListViewItem> ListViewItems = new List <ListViewItem>(); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot).Key; if (FilterColsTable.ContainsKey(FilterPattern)) { FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + 1); } else { FilterColsTable.Add(FilterPattern, Slot + 1); } } foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); ListViewItem lvItem = null; string DocUrl; string PairKey; string StatusCode; string Status; string MimeType; if (msDoc == null) { continue; } else { DocUrl = msDoc.GetUrl(); PairKey = DocUrl; StatusCode = ((int)msDoc.GetStatusCode()).ToString(); Status = msDoc.GetStatusCode().ToString(); MimeType = msDoc.GetMimeType(); } if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc)) { continue; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode; lvItem.SubItems[ColStatus].Text = Status; lvItem.SubItems[ColMimeType].Text = MimeType; for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot: Slot).Key; KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern); int ColOffset = this.FilterColOffset + FilterColsTable[FilterPattern]; if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED)) { lvItem.SubItems[ColOffset].Text = MacroscopeConstants.TextPresenceLabels[Pair.Value]; switch (Pair.Value) { case MacroscopeConstants.TextPresence.CONTAINS_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.CONTAINS_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; default: lvItem.SubItems[ColOffset].ForeColor = Color.Gray; break; } } } } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayCustomFilters MISSING: {0}", PairKey)); } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (Regex.IsMatch(StatusCode, "^[2]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; lvItem.SubItems[ColStatus].ForeColor = Color.Green; } else if (Regex.IsMatch(StatusCode, "^[3]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod; lvItem.SubItems[ColStatus].ForeColor = Color.Goldenrod; } else if (Regex.IsMatch(StatusCode, "^[45]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; lvItem.SubItems[ColStatus].ForeColor = Color.Red; } else { lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue; lvItem.SubItems[ColStatus].ForeColor = Color.Blue; } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 100; this.DisplayListView.Columns[ColStatus].Width = 100; this.DisplayListView.Columns[ColMimeType].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ public MacroscopeCsvCustomFilterReport(MacroscopeCustomFilters NewCustomFilter) { this.CustomFilter = NewCustomFilter; }
/** -------------------------------------------------------------------- **/ private async Task _ProcessTextPage() { List <string> TextDoc = new List <string>(); MacroscopeHttpTwoClient Client = this.DocCollection.GetJobMaster().GetHttpClient(); MacroscopeHttpTwoClientResponse Response = null; string ResponseErrorCondition = null; try { Response = await Client.Get( this.GetUri(), this.ConfigureTextPageRequestHeadersCallback, this.PostProcessRequestHttpHeadersCallback ); } catch (MacroscopeDocumentException ex) { this.DebugMsg(string.Format("_ProcessTextPage :: MacroscopeDocumentException: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessTextPage", ex.Message); } catch (Exception ex) { this.DebugMsg(string.Format("_ProcessTextPage :: Exception: {0}", ex.Message)); ResponseErrorCondition = ex.Message; this.SetStatusCode(HttpStatusCode.BadRequest); this.AddRemark("_ProcessTextPage", ex.Message); } if (Response != null) { string RawData = ""; this.ProcessResponseHttpHeaders(Response: Response); /** Get Response Body ---------------------------------------------- **/ try { DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType)); RawData = Response.GetContentAsString(); this.SetContentLength(Length: RawData.Length); // May need to find bytes length this.SetWasDownloaded(true); this.SetChecksum(RawData); } catch (Exception ex) { DebugMsg(string.Format("Exception: {0}", ex.Message)); this.SetStatusCode(HttpStatusCode.BadRequest); RawData = ""; this.SetContentLength(Length: 0); } /** ---------------------------------------------------------------- **/ if (!string.IsNullOrEmpty(RawData)) { string[] Lines = Regex.Split(RawData, @"[\r\n]+"); TextDoc = Lines.ToList(); DebugMsg(string.Format("TextDoc: {0}", TextDoc.Count)); } else { DebugMsg(string.Format("RawData: {0}", "EMPTY")); } /** Custom Filters ------------------------------------------------- **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetCustomFiltersEnable() && MacroscopePreferencesManager.GetCustomFiltersApplyToText()) { MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter(); if ((CustomFilter != null) && (CustomFilter.IsEnabled())) { this.ProcessGenericCustomFiltered( CustomFilter: CustomFilter, GenericText: RawData ); } } } /** Data Extractors ------------------------------------------------ **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetDataExtractorsEnable() && MacroscopePreferencesManager.GetDataExtractorsApplyToText()) { this.ProcessGenericDataExtractors(GenericText: RawData); } } /** Process Text Document ------------------------------------------ **/ if ((TextDoc != null) && (TextDoc.Count > 0)) { this.SetDocumentText(Text: string.Join(Environment.NewLine, TextDoc)); if (this.GetPath().EndsWith("robots.txt", StringComparison.InvariantCultureIgnoreCase)) { long?TextSize = this.GetContentLength(); long?RobotsMaxTextSize = 1024 * 512; this.ProcessRobotsTextOutlinks(TextDoc: TextDoc); if (this.DetectSitemapTextDocument(TextDoc: TextDoc)) { DebugMsg(string.Format("ProcessTextPage: {0} :: {1}", "SITEMAP DETECTED", this.GetUrl())); this.SetDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPTEXT); this.ProcessSitemapTextOutlinks(TextDoc: TextDoc); } if (TextSize > RobotsMaxTextSize) { this.AddRemark("ROBOTS_TOO_BIG", "Robots.txt is larger than 512KB"); } } else { if (this.GetIsInternal()) { this.ProcessPureTextOutlinks(TextDoc: TextDoc, LinkType: MacroscopeConstants.InOutLinkType.PURETEXT); } } } else { this.SetDocumentText(Text: ""); } /** ---------------------------------------------------------------- **/ } if (ResponseErrorCondition != null) { this.ProcessErrorCondition(ResponseErrorCondition); } }
/**************************************************************************/ private void ProcessCssPage() { HttpWebRequest req = null; HttpWebResponse res = null; string ResponseErrorCondition = null; Boolean IsAuthenticating = false; DebugMsg(string.Format("ProcessCssPage: {0}", "")); try { req = WebRequest.CreateHttp(this.DocUrl); req.Method = "GET"; req.Timeout = this.Timeout; req.KeepAlive = false; req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; this.PrepareRequestHttpHeaders(req: req); IsAuthenticating = this.AuthenticateRequest(req); MacroscopePreferencesManager.EnableHttpProxy(req); res = ( HttpWebResponse )req.GetResponse(); } catch (UriFormatException ex) { DebugMsg(string.Format("ProcessCssPage :: UriFormatException: {0}", ex.Message)); ResponseErrorCondition = ex.Message; } catch (TimeoutException ex) { DebugMsg(string.Format("ProcessCssPage :: TimeoutException: {0}", ex.Message)); ResponseErrorCondition = ex.Message; } catch (WebException ex) { DebugMsg(string.Format("ProcessCssPage :: WebException: {0}", ex.Message)); DebugMsg(string.Format("ProcessCssPage :: WebException: {0}", ex.Status)); DebugMsg(string.Format("ProcessCssPage :: WebException: {0}", ( int )ex.Status)); ResponseErrorCondition = ex.Status.ToString(); } if (res != null) { string RawData = ""; this.ProcessResponseHttpHeaders(req, res); if (IsAuthenticating) { this.VerifyOrPurgeCredential(); } // Get Response Body try { DebugMsg(string.Format("MIME TYPE: {0}", this.MimeType)); Stream ResponseStream = res.GetResponseStream(); StreamReader ResponseStreamReader; if (this.GetCharacterEncoding() != null) { ResponseStreamReader = new StreamReader(ResponseStream, this.GetCharacterEncoding()); } else { ResponseStreamReader = new StreamReader(ResponseStream); } RawData = ResponseStreamReader.ReadToEnd(); this.ContentLength = RawData.Length; // May need to find bytes length this.SetWasDownloaded(true); } catch (WebException ex) { DebugMsg(string.Format("WebException: {0}", ex.Message)); if (ex.Response != null) { this.SetStatusCode((( HttpWebResponse )ex.Response).StatusCode); } else { this.SetStatusCode(( HttpStatusCode )ex.Status); } RawData = ""; this.ContentLength = 0; } catch (Exception ex) { DebugMsg(string.Format("Exception: {0}", ex.Message)); this.SetStatusCode(HttpStatusCode.BadRequest); this.ContentLength = 0; } if (!string.IsNullOrEmpty(RawData)) { ExCSS.Parser ExCssParser = new ExCSS.Parser(); ExCSS.StyleSheet ExCssStylesheet = ExCssParser.Parse(RawData); this.ProcessCssOutlinks(ExCssStylesheet); } else { DebugMsg(string.Format("ProcessCssPage: ERROR: {0}", this.GetUrl())); } /** Custom Filters ------------------------------------------------- **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetCustomFiltersEnable() && MacroscopePreferencesManager.GetCustomFiltersApplyToCss()) { MacroscopeCustomFilters CustomFilter = this.DocCollection.GetJobMaster().GetCustomFilter(); if ((CustomFilter != null) && (CustomFilter.IsEnabled())) { this.ProcessGenericCustomFiltered( CustomFilter: CustomFilter, GenericText: RawData ); } } } /** Data Extractors ------------------------------------------------ **/ if (!string.IsNullOrEmpty(RawData)) { if ( MacroscopePreferencesManager.GetDataExtractorsEnable() && MacroscopePreferencesManager.GetDataExtractorsApplyToCss()) { this.ProcessGenericDataExtractors(GenericText: RawData); } } /** Title ---------------------------------------------------------- **/ { MatchCollection reMatches = Regex.Matches(this.DocUrl, "/([^/]+)$"); string DocumentTitle = null; foreach (Match match in reMatches) { if (match.Groups[1].Value.Length > 0) { DocumentTitle = match.Groups[1].Value.ToString(); break; } } if (DocumentTitle != null) { this.SetTitle(DocumentTitle, MacroscopeConstants.TextProcessingMode.NO_PROCESSING); DebugMsg(string.Format("TITLE: {0}", this.GetTitle())); } else { DebugMsg(string.Format("TITLE: {0}", "MISSING")); } } res.Close(); res.Dispose(); } if (ResponseErrorCondition != null) { this.ProcessErrorCondition(ResponseErrorCondition); } }