/** -------------------------------------------------------------------- **/ protected override bool ValidateExpression(TextBox TextBoxObject, bool ShowErrorDialogue) { bool IsValid = false; if (!this.GetEnableValidation()) { IsValid = false; } try { string Value = TextBoxObject.Text; if (MacroscopeDataExtractorXpaths.SyntaxCheckXpath(XpathString: Value)) { IsValid = true; } } catch (Exception ex) { ms.DebugMsg(ex.Message); } if ((!IsValid) && (ShowErrorDialogue)) { this.DialogueBoxError(AlertTitle: "Error", AlertMessage: "Invalid XPath Expression."); TextBoxObject.Focus(); } return(IsValid); }
/**************************************************************************/ public MacroscopeExcelDataExtractorReport( MacroscopeDataExtractorCssSelectors NewDataExtractorCssSelectors, MacroscopeDataExtractorRegexes NewDataExtractorRegexes, MacroscopeDataExtractorXpaths NewDataExtractorXpaths ) { this.DataExtractorCssSelectors = NewDataExtractorCssSelectors; this.DataExtractorRegexes = NewDataExtractorRegexes; this.DataExtractorXpaths = NewDataExtractorXpaths; }
public void TestTitlesOuterHtml() { Dictionary <string, string> AssetDic = new Dictionary <string, string> () { { "HtmlDoc001", "<title>HtmlDoc001</title>" }, { "HtmlDoc002", "<title>HtmlDoc002</title>" }, { "HtmlDoc003", "<title>HtmlDoc003</title>" }, { "HtmlDoc004", "<title>HtmlDoc004</title>" }, { "HtmlDoc005", "<title>HtmlDoc005</title>" } }; MacroscopeDataExtractorXpaths DataExtractor = new MacroscopeDataExtractorXpaths(Size: 1); DataExtractor.SetXpath( Slot: 0, XpathLabel: "TestTitlesOuterHtml", XpathString: "//title", ExtractorType: MacroscopeConstants.DataExtractorType.OUTERHTML ); DataExtractor.SetActiveInactive( Slot: 0, State: MacroscopeConstants.ActiveInactive.ACTIVE ); foreach (string HtmlDocKey in this.HtmlDocs.Keys) { string Html = this.HtmlDocs[HtmlDocKey]; List <KeyValuePair <string, string> > ResultList = DataExtractor.AnalyzeHtml(Html: Html); DebugMsg(string.Format("HtmlDocKey: {0} :: Value: {1}", HtmlDocKey, ResultList[0].Value)); Assert.IsNotEmpty(ResultList, "WHOOPS!"); Assert.AreEqual(AssetDic[HtmlDocKey], ResultList[0].Value); } }
public void TestWriteXslx() { MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE); MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1); MacroscopeDataExtractorRegexes DataExtractorRegexes = new MacroscopeDataExtractorRegexes(1); MacroscopeDataExtractorXpaths DataExtractorXpaths = new MacroscopeDataExtractorXpaths(1); MacroscopeExcelDataExtractorReport Report = new MacroscopeExcelDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths); string Filename = string.Join(".", Path.GetTempFileName(), "xlsx"); Report.WriteXslx(JobMaster: JobMaster, OutputFilename: Filename); Assert.IsTrue(File.Exists(Filename)); File.Delete(Filename); }
/**************************************************************************/ public MacroscopeDataExtractorXpathsForm(MacroscopeDataExtractorXpaths NewDataExtractor) { InitializeComponent(); // The InitializeComponent() call is required for Windows Forms designer support. this.dataExtractorInstance.ConfigureDataExtractorForm( NewContainerForm: this, NewDataExtractor: NewDataExtractor ); this.dataExtractorInstance.SetDataExtractor(); this.FormClosing += this.CallbackFormClosing; this.buttonClear.Click += this.ClearDataExtractorForm; this.buttonCancel.Click += this.CloseDataExtractorForm; }
public void TestWriteCsv() { MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE); MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1); MacroscopeDataExtractorRegexes DataExtractorRegexes = new MacroscopeDataExtractorRegexes(1); MacroscopeDataExtractorXpaths DataExtractorXpaths = new MacroscopeDataExtractorXpaths(1); MacroscopeCsvDataExtractorReport ReportFile = new MacroscopeCsvDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths); List <MacroscopeCsvDataExtractorReport.OutputWorksheet> SelectedOutputWorksheets = new List <MacroscopeCsvDataExtractorReport.OutputWorksheet>(); SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.CSS_SELECTORS); SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.REGEXES); SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.XPATHS); foreach (MacroscopeCsvDataExtractorReport.OutputWorksheet SelectedOutputWorksheet in SelectedOutputWorksheets) { string Filename = string.Join(".", Path.GetTempFileName(), ".csv"); ReportFile.WriteCsv(JobMaster: JobMaster, SelectedOutputWorksheet: SelectedOutputWorksheet, OutputFilename: Filename); Assert.IsTrue(File.Exists(Filename)); File.Delete(Filename); } }
/**************************************************************************/ public void RefreshData( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeDataExtractorXpaths DataExtractor ) { if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListView( DocCollection: DocCollection, UrlList: UrlList, DataExtractor: DataExtractor ); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } ) ); } else { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListView( DocCollection: DocCollection, UrlList: UrlList, DataExtractor: DataExtractor ); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } }
public void TestTitlesInnerText() { Dictionary <string, string> AssetDic = new Dictionary <string, string>(); AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc001.html", "HtmlDoc001"); AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc002.html", "HtmlDoc002"); AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc003.html", "HtmlDoc003"); AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc004.html", "HtmlDoc004"); AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc005.html", "HtmlDoc005"); MacroscopeDataExtractorXpaths DataExtractor = new MacroscopeDataExtractorXpaths(Size: 1); DataExtractor.SetXpath( Slot: 0, XpathLabel: "TestTitlesInnerText", XpathString: "//title", ExtractorType: MacroscopeConstants.DataExtractorType.INNERTEXT ); DataExtractor.SetActiveInactive( Slot: 0, State: MacroscopeConstants.ActiveInactive.ACTIVE ); foreach (string HtmlDocKey in this.HtmlDocs.Keys) { string Html = this.HtmlDocs[HtmlDocKey]; List <KeyValuePair <string, string> > ResultList = DataExtractor.AnalyzeHtml(Html: Html); DebugMsg(string.Format("HtmlDocKey: {0} :: Value: {1}", HtmlDocKey, ResultList[0].Value)); Assert.IsNotEmpty(ResultList, "WHOOPS!"); Assert.AreEqual(AssetDic[HtmlDocKey], ResultList[0].Value); } }
/**************************************************************************/ public void ConfigureDataExtractorForm( MacroscopeDataExtractorXpathsForm NewContainerForm, MacroscopeDataExtractorXpaths NewDataExtractor ) { this.ContainerForm = NewContainerForm; this.DataExtractor = NewDataExtractor; int Max = this.DataExtractor.GetSize(); TableLayoutPanel Table = this.tableLayoutPanelControlsGrid; Table.Dock = DockStyle.Fill; Table.ColumnCount = 5; Table.RowCount = Max + 1; { List <string> ColumnLabels = new List <string> (5) { "", "Active/Inactive", "Extractor Label", "XPath Expression", "Extract To" }; for (int i = 0; i < ColumnLabels.Count; i++) { Label TextLabelCol = new Label(); TextLabelCol.Text = ColumnLabels[i]; TextLabelCol.TextAlign = ContentAlignment.BottomLeft; TextLabelCol.Dock = DockStyle.Fill; TextLabelCol.Margin = new Padding(5, 5, 5, 5); Table.Controls.Add(TextLabelCol); } } for (int Slot = 0; Slot < Max; Slot++) { Label TextLabel = new Label(); ComboBox StateComboBox = new ComboBox(); TextBox TextBoxLabel = new TextBox(); TextBox TextBoxExpression = new TextBox(); ComboBox ExtractToComboBox = new ComboBox(); TextLabel.Text = string.Format("XPath {0}", Slot + 1); TextLabel.TextAlign = ContentAlignment.MiddleRight; TextLabel.Dock = DockStyle.Fill; TextLabel.Margin = new Padding(5, 5, 5, 5); StateComboBox.Name = string.Format("StateComboBox{0}", Slot + 1); StateComboBox.Items.Add("Inactive"); StateComboBox.Items.Add("Active"); StateComboBox.DropDownStyle = ComboBoxStyle.DropDownList; StateComboBox.SelectedIndex = 0; StateComboBox.Margin = new Padding(5, 5, 5, 5); StateComboBox.Width = 100; TextBoxLabel.Name = string.Format("TextBoxLabel{0}", Slot + 1); TextBoxLabel.Dock = DockStyle.Fill; TextBoxLabel.Margin = new Padding(5, 5, 5, 5); TextBoxLabel.Tag = Slot.ToString(); TextBoxLabel.KeyUp += this.CallbackTextBoxKeyUp; TextBoxLabel.TextChanged += this.CallbackTextBoxLabelTextChanged; TextBoxExpression.Name = string.Format("TextBoxExpression{0}", Slot + 1); TextBoxExpression.Dock = DockStyle.Fill; TextBoxExpression.Margin = new Padding(5, 5, 5, 5); TextBoxExpression.Tag = Slot.ToString(); TextBoxExpression.KeyUp += this.CallbackTextBoxKeyUp; TextBoxExpression.TextChanged += this.CallbackTextBoxExpressionTextChanged; ExtractToComboBox.Name = string.Format("ExtractToComboBox{0}", Slot + 1); ExtractToComboBox.Items.Add("Extract HTML Element"); ExtractToComboBox.Items.Add("Extract Inner HTML Elements"); ExtractToComboBox.Items.Add("Extract Text"); ExtractToComboBox.DropDownStyle = ComboBoxStyle.DropDownList; ExtractToComboBox.SelectedIndex = 0; ExtractToComboBox.Margin = new Padding(5, 5, 5, 5); ExtractToComboBox.Width = 160; Table.Controls.Add(TextLabel); Table.Controls.Add(StateComboBox); Table.Controls.Add(TextBoxLabel); Table.Controls.Add(TextBoxExpression); Table.Controls.Add(ExtractToComboBox); this.TextBoxLabels.Add(TextBoxLabel); this.StateComboBoxes.Add(StateComboBox); this.TextBoxExpressions.Add(TextBoxExpression); this.ExtractToComboBoxes.Add(ExtractToComboBox); } // Add empty last row for space adjustment for (int i = 0; i < Table.ColumnCount; i++) { Label TextLabelCol = new Label(); TextLabelCol.Text = ""; Table.Controls.Add(TextLabelCol); } Table.AutoScroll = false; Table.Padding = new Padding(0, 0, 15, 0); Table.AutoScroll = true; }
/** -------------------------------------------------------------------- **/ public void SetDataExtractorXpaths(MacroscopeDataExtractorXpaths NewDataExtractor) { this.DataExtractorXpaths = NewDataExtractor; }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeDataExtractorXpaths DataExtractor ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem> (); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )DocCollection.CountDocuments(); decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc)) { continue; } foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedXpaths()) { ListViewItem lvItem = null; string XpathLabel = DataExtractedPair.Key; string ExtractedValue = DataExtractedPair.Value; string PairKey = null; if ( string.IsNullOrEmpty(XpathLabel) || string.IsNullOrEmpty(ExtractedValue)) { continue; } PairKey = string.Join( ":", UrlToDigest(DocUrl), UrlToDigest(Macroscope.GetStringDigest(Text: XpathLabel)), UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue)) ); if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; for (int i = 0; i < 6; i++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode; lvItem.SubItems[ColStatus].Text = Status; lvItem.SubItems[ColMimeType].Text = MimeType; lvItem.SubItems[ColXpathLabel].Text = XpathLabel; lvItem.SubItems[ColExtractedValue].Text = ExtractedValue; } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDataExtractorXpaths: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayDataExtractorXpaths: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayDataExtractorXpaths MISSING: {0}", PairKey)); } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (Regex.IsMatch(StatusCode, "^[2]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; lvItem.SubItems[ColStatus].ForeColor = Color.Green; } else if (Regex.IsMatch(StatusCode, "^[3]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod; lvItem.SubItems[ColStatus].ForeColor = Color.Goldenrod; } else if (Regex.IsMatch(StatusCode, "^[45]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; lvItem.SubItems[ColStatus].ForeColor = Color.Red; } else { lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue; lvItem.SubItems[ColStatus].ForeColor = Color.Blue; } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 100; this.DisplayListView.Columns[ColStatus].Width = 100; this.DisplayListView.Columns[ColMimeType].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }