public void TestExtractorsLong() { Macroscope ms = new Macroscope(); MacroscopeDataExtractorRegexes DataExtractor = new MacroscopeDataExtractorRegexes(Size: 1); List <string> Texts = new List <string> (); Texts.Add("The quick brown fox jumps over the lazy dog."); DataExtractor.SetRegex(0, "Long:", "The (quick brown) fox jumps over the (lazy dog)"); foreach (string ContainsText in Texts) { List <KeyValuePair <string, string> > AnalyzedList = DataExtractor.AnalyzeText(Text: ContainsText); Assert.IsNotNull(AnalyzedList); foreach (KeyValuePair <string, string> AnalyzedItem in AnalyzedList) { ms.DebugMsg(string.Format("ITEM: {0} => \"{1}\"", AnalyzedItem.Key, AnalyzedItem.Value)); } Assert.AreEqual( 2, AnalyzedList.Count, // Should match 2 times string.Format("Wrong number of matches: {0}", AnalyzedList.Count) ); } }
/** -------------------------------------------------------------------- **/ protected override bool ValidateExpression(TextBox TextBoxObject, bool ShowErrorDialogue) { bool IsValid = false; if (!this.GetEnableValidation()) { IsValid = false; } try { string Value = TextBoxObject.Text; if (MacroscopeDataExtractorRegexes.SyntaxCheckRegex(RegexString: Value)) { IsValid = true; } } catch (Exception ex) { ms.DebugMsg(ex.Message); IsValid = false; } if ((!IsValid) && (ShowErrorDialogue)) { this.DialogueBoxError(AlertTitle: "Error", AlertMessage: "Invalid Regular Expression."); TextBoxObject.Focus(); } return(IsValid); }
public void TestExtractors() { Macroscope ms = new Macroscope(); MacroscopeDataExtractorRegexes DataExtractor = new MacroscopeDataExtractorRegexes(Size: 5); List <string> Texts = new List <string> (); Texts.Add("The quick brown fox jumps over the lazy dog."); DataExtractor.SetRegex(0, "Label: The", @"\b([tT]he)\b"); DataExtractor.SetRegex(1, "Label: over", @"\b([oO]ver)\b"); DataExtractor.SetRegex(2, "Label: fox", @"\b([fF]ox)\b"); DataExtractor.SetRegex(3, "Label: dog", @"\b([dD]og)\b"); DataExtractor.SetRegex(4, "Label: brown", @"\b([bB]rown)\b"); foreach (string ContainsText in Texts) { List <KeyValuePair <string, string> > AnalyzedList = DataExtractor.AnalyzeText(Text: ContainsText); Assert.IsNotNull(AnalyzedList); foreach (KeyValuePair <string, string> AnalyzedItem in AnalyzedList) { ms.DebugMsg(string.Format("ITEM: {0} => \"{1}\"", AnalyzedItem.Key, AnalyzedItem.Value)); } Assert.AreEqual( 6, AnalyzedList.Count, // Should match 6 times string.Format("Wrong number of matches: {0}", AnalyzedList.Count) ); } }
/**************************************************************************/ public MacroscopeExcelDataExtractorReport( MacroscopeDataExtractorCssSelectors NewDataExtractorCssSelectors, MacroscopeDataExtractorRegexes NewDataExtractorRegexes, MacroscopeDataExtractorXpaths NewDataExtractorXpaths ) { this.DataExtractorCssSelectors = NewDataExtractorCssSelectors; this.DataExtractorRegexes = NewDataExtractorRegexes; this.DataExtractorXpaths = NewDataExtractorXpaths; }
public void TestWriteXslx() { MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE); MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1); MacroscopeDataExtractorRegexes DataExtractorRegexes = new MacroscopeDataExtractorRegexes(1); MacroscopeDataExtractorXpaths DataExtractorXpaths = new MacroscopeDataExtractorXpaths(1); MacroscopeExcelDataExtractorReport Report = new MacroscopeExcelDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths); string Filename = string.Join(".", Path.GetTempFileName(), "xlsx"); Report.WriteXslx(JobMaster: JobMaster, OutputFilename: Filename); Assert.IsTrue(File.Exists(Filename)); File.Delete(Filename); }
/**************************************************************************/ public MacroscopeDataExtractorRegexesForm(MacroscopeDataExtractorRegexes NewDataExtractor) { InitializeComponent(); // The InitializeComponent() call is required for Windows Forms designer support. this.dataExtractorInstance.ConfigureDataExtractorForm( NewContainerForm: this, NewDataExtractor: NewDataExtractor ); this.dataExtractorInstance.SetDataExtractor(); this.FormClosing += this.CallbackFormClosing; this.buttonClear.Click += this.ClearDataExtractorForm; this.buttonCancel.Click += this.CloseDataExtractorForm; }
/** -------------------------------------------------------------------- **/ private void ProcessGenericDataExtractorRegexes( MacroscopeDataExtractorRegexes DataExtractor, string GenericText ) { List <KeyValuePair <string, string> > Analyzed; Analyzed = DataExtractor.AnalyzeText(Text: GenericText); foreach (KeyValuePair <string, string> Pair in Analyzed) { this.SetDataExtractedRegexes( Label: Pair.Key, Text: Pair.Value ); } }
/** Process Data Extractors ***********************************************/ private void ProcessGenericDataExtractors( string GenericText ) { MacroscopeJobMaster JobMaster = this.DocCollection.GetJobMaster(); { MacroscopeDataExtractorRegexes DataExtractor = JobMaster.GetDataExtractorRegexes(); if ((DataExtractor != null) && (DataExtractor.IsEnabled())) { this.ProcessGenericDataExtractorRegexes( DataExtractor: DataExtractor, GenericText: GenericText ); } } }
public void TestWriteCsv() { MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE); MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1); MacroscopeDataExtractorRegexes DataExtractorRegexes = new MacroscopeDataExtractorRegexes(1); MacroscopeDataExtractorXpaths DataExtractorXpaths = new MacroscopeDataExtractorXpaths(1); MacroscopeCsvDataExtractorReport ReportFile = new MacroscopeCsvDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths); List <MacroscopeCsvDataExtractorReport.OutputWorksheet> SelectedOutputWorksheets = new List <MacroscopeCsvDataExtractorReport.OutputWorksheet>(); SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.CSS_SELECTORS); SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.REGEXES); SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.XPATHS); foreach (MacroscopeCsvDataExtractorReport.OutputWorksheet SelectedOutputWorksheet in SelectedOutputWorksheets) { string Filename = string.Join(".", Path.GetTempFileName(), ".csv"); ReportFile.WriteCsv(JobMaster: JobMaster, SelectedOutputWorksheet: SelectedOutputWorksheet, OutputFilename: Filename); Assert.IsTrue(File.Exists(Filename)); File.Delete(Filename); } }
/**************************************************************************/ public void RefreshData( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeDataExtractorRegexes DataExtractor ) { if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListView( DocCollection: DocCollection, UrlList: UrlList, DataExtractor: DataExtractor ); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } ) ); } else { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListView( DocCollection: DocCollection, UrlList: UrlList, DataExtractor: DataExtractor ); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeDataExtractorRegexes DataExtractor ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem>(); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); string DocUrl = msDoc.GetUrl(); string StatusCode = ((int)msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc)) { continue; } foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedRegexes()) { ListViewItem lvItem = null; string RegexLabel = DataExtractedPair.Key; string ExtractedValue = DataExtractedPair.Value; string PairKey = null; if ( string.IsNullOrEmpty(RegexLabel) || string.IsNullOrEmpty(ExtractedValue)) { continue; } PairKey = string.Join( ":", UrlToDigest(DocUrl), UrlToDigest(Macroscope.GetStringDigest(Text: RegexLabel)), UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue)) ); if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; for (int i = 0; i < 6; i++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode; lvItem.SubItems[ColStatus].Text = Status; lvItem.SubItems[ColMimeType].Text = MimeType; lvItem.SubItems[ColRegexLabel].Text = RegexLabel; lvItem.SubItems[ColExtractedValue].Text = ExtractedValue; } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes MISSING: {0}", PairKey)); } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (Regex.IsMatch(StatusCode, "^[2]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; lvItem.SubItems[ColStatus].ForeColor = Color.Green; } else if (Regex.IsMatch(StatusCode, "^[3]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod; lvItem.SubItems[ColStatus].ForeColor = Color.Goldenrod; } else if (Regex.IsMatch(StatusCode, "^[45]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; lvItem.SubItems[ColStatus].ForeColor = Color.Red; } else { lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue; lvItem.SubItems[ColStatus].ForeColor = Color.Blue; } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DeduplicateListView(DuplicatedListView: this.DisplayListView); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 100; this.DisplayListView.Columns[ColStatus].Width = 100; this.DisplayListView.Columns[ColMimeType].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } ProgressForm.Dispose(); }
/**************************************************************************/ public void ConfigureDataExtractorForm( MacroscopeDataExtractorRegexesForm NewContainerForm, MacroscopeDataExtractorRegexes NewDataExtractor ) { this.ContainerForm = NewContainerForm; this.DataExtractor = NewDataExtractor; int Max = this.DataExtractor.GetSize(); TableLayoutPanel Table = this.tableLayoutPanelControlsGrid; Table.Dock = DockStyle.Fill; Table.ColumnCount = 4; Table.RowCount = Max + 1; { List <string> ColumnLabels = new List <string> (4) { "", "Active/Inactive", "Extractor Label", "Regular Expression Pattern" }; for (int i = 0; i < ColumnLabels.Count; i++) { Label TextLabelCol = new Label(); TextLabelCol.Text = ColumnLabels[i]; TextLabelCol.TextAlign = ContentAlignment.BottomLeft; TextLabelCol.Dock = DockStyle.Fill; TextLabelCol.Margin = new Padding(5, 5, 5, 5); Table.Controls.Add(TextLabelCol); } } for (int Slot = 0; Slot < Max; Slot++) { Label TextLabel = new Label(); ComboBox StateComboBox = new ComboBox(); TextBox TextBoxLabel = new TextBox(); TextBox TextBoxExpression = new TextBox(); TextLabel.Text = string.Format("Regex {0}", Slot + 1); TextLabel.TextAlign = ContentAlignment.MiddleRight; TextLabel.Dock = DockStyle.Fill; TextLabel.Margin = new Padding(5, 5, 5, 5); StateComboBox.Name = string.Format("StateComboBox{0}", Slot + 1); StateComboBox.Items.Add("Inactive"); StateComboBox.Items.Add("Active"); StateComboBox.DropDownStyle = ComboBoxStyle.DropDownList; StateComboBox.SelectedIndex = 0; StateComboBox.Margin = new Padding(5, 5, 5, 5); StateComboBox.Width = 100; TextBoxLabel.Name = string.Format("TextBoxLabel{0}", Slot + 1); TextBoxLabel.Dock = DockStyle.Fill; TextBoxLabel.Margin = new Padding(5, 5, 5, 5); TextBoxLabel.Tag = Slot.ToString(); TextBoxLabel.KeyUp += this.CallbackTextBoxKeyUp; TextBoxLabel.TextChanged += this.CallbackTextBoxLabelTextChanged; TextBoxExpression.Name = string.Format("TextBoxExpression{0}", Slot + 1); TextBoxExpression.Dock = DockStyle.Fill; TextBoxExpression.Margin = new Padding(5, 5, 5, 5); TextBoxExpression.Tag = Slot.ToString(); TextBoxExpression.KeyUp += this.CallbackTextBoxKeyUp; TextBoxExpression.TextChanged += this.CallbackTextBoxExpressionTextChanged; Table.Controls.Add(TextLabel); Table.Controls.Add(StateComboBox); Table.Controls.Add(TextBoxLabel); Table.Controls.Add(TextBoxExpression); this.TextBoxLabels.Add(TextBoxLabel); this.StateComboBoxes.Add(StateComboBox); this.TextBoxExpressions.Add(TextBoxExpression); } // Add empty last row for space adjustment for (int i = 0; i < Table.ColumnCount; i++) { Label TextLabelCol = new Label(); TextLabelCol.Text = ""; Table.Controls.Add(TextLabelCol); } Table.AutoScroll = false; Table.Padding = new Padding(0, 0, 15, 0); Table.AutoScroll = true; }
/** -------------------------------------------------------------------- **/ public void SetDataExtractorRegexes(MacroscopeDataExtractorRegexes NewDataExtractor) { this.DataExtractorRegexes = NewDataExtractor; }