Example #1
0
        public void TestExtractorsLong()
        {
            Macroscope ms = new Macroscope();

            MacroscopeDataExtractorRegexes DataExtractor = new MacroscopeDataExtractorRegexes(Size: 1);

            List <string> Texts = new List <string> ();

            Texts.Add("The quick brown fox jumps over the lazy dog.");

            DataExtractor.SetRegex(0, "Long:", "The (quick brown) fox jumps over the (lazy dog)");

            foreach (string ContainsText in Texts)
            {
                List <KeyValuePair <string,  string> > AnalyzedList = DataExtractor.AnalyzeText(Text: ContainsText);

                Assert.IsNotNull(AnalyzedList);

                foreach (KeyValuePair <string,  string> AnalyzedItem in AnalyzedList)
                {
                    ms.DebugMsg(string.Format("ITEM: {0} => \"{1}\"", AnalyzedItem.Key, AnalyzedItem.Value));
                }

                Assert.AreEqual(
                    2,
                    AnalyzedList.Count, // Should match 2 times
                    string.Format("Wrong number of matches: {0}", AnalyzedList.Count)
                    );
            }
        }
        /** -------------------------------------------------------------------- **/

        protected override bool ValidateExpression(TextBox TextBoxObject, bool ShowErrorDialogue)
        {
            bool IsValid = false;

            if (!this.GetEnableValidation())
            {
                IsValid = false;
            }

            try
            {
                string Value = TextBoxObject.Text;

                if (MacroscopeDataExtractorRegexes.SyntaxCheckRegex(RegexString: Value))
                {
                    IsValid = true;
                }
            }
            catch (Exception ex)
            {
                ms.DebugMsg(ex.Message);

                IsValid = false;
            }

            if ((!IsValid) && (ShowErrorDialogue))
            {
                this.DialogueBoxError(AlertTitle: "Error", AlertMessage: "Invalid Regular Expression.");
                TextBoxObject.Focus();
            }

            return(IsValid);
        }
Example #3
0
        public void TestExtractors()
        {
            Macroscope ms = new Macroscope();

            MacroscopeDataExtractorRegexes DataExtractor = new MacroscopeDataExtractorRegexes(Size: 5);

            List <string> Texts = new List <string> ();

            Texts.Add("The quick brown fox jumps over the lazy dog.");

            DataExtractor.SetRegex(0, "Label: The", @"\b([tT]he)\b");
            DataExtractor.SetRegex(1, "Label: over", @"\b([oO]ver)\b");
            DataExtractor.SetRegex(2, "Label: fox", @"\b([fF]ox)\b");
            DataExtractor.SetRegex(3, "Label: dog", @"\b([dD]og)\b");
            DataExtractor.SetRegex(4, "Label: brown", @"\b([bB]rown)\b");

            foreach (string ContainsText in Texts)
            {
                List <KeyValuePair <string,  string> > AnalyzedList = DataExtractor.AnalyzeText(Text: ContainsText);

                Assert.IsNotNull(AnalyzedList);

                foreach (KeyValuePair <string,  string> AnalyzedItem in AnalyzedList)
                {
                    ms.DebugMsg(string.Format("ITEM: {0} => \"{1}\"", AnalyzedItem.Key, AnalyzedItem.Value));
                }

                Assert.AreEqual(
                    6,
                    AnalyzedList.Count, // Should match 6 times
                    string.Format("Wrong number of matches: {0}", AnalyzedList.Count)
                    );
            }
        }
Example #4
0
        /**************************************************************************/

        public MacroscopeExcelDataExtractorReport(
            MacroscopeDataExtractorCssSelectors NewDataExtractorCssSelectors,
            MacroscopeDataExtractorRegexes NewDataExtractorRegexes,
            MacroscopeDataExtractorXpaths NewDataExtractorXpaths
            )
        {
            this.DataExtractorCssSelectors = NewDataExtractorCssSelectors;
            this.DataExtractorRegexes      = NewDataExtractorRegexes;
            this.DataExtractorXpaths       = NewDataExtractorXpaths;
        }
        public void TestWriteXslx()
        {
            MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE);
            MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1);
            MacroscopeDataExtractorRegexes      DataExtractorRegexes      = new MacroscopeDataExtractorRegexes(1);
            MacroscopeDataExtractorXpaths       DataExtractorXpaths       = new MacroscopeDataExtractorXpaths(1);
            MacroscopeExcelDataExtractorReport  Report = new MacroscopeExcelDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths);
            string Filename = string.Join(".", Path.GetTempFileName(), "xlsx");

            Report.WriteXslx(JobMaster: JobMaster, OutputFilename: Filename);
            Assert.IsTrue(File.Exists(Filename));
            File.Delete(Filename);
        }
Example #6
0
        /**************************************************************************/

        public MacroscopeDataExtractorRegexesForm(MacroscopeDataExtractorRegexes NewDataExtractor)
        {
            InitializeComponent(); // The InitializeComponent() call is required for Windows Forms designer support.

            this.dataExtractorInstance.ConfigureDataExtractorForm(
                NewContainerForm: this,
                NewDataExtractor: NewDataExtractor
                );

            this.dataExtractorInstance.SetDataExtractor();

            this.FormClosing        += this.CallbackFormClosing;
            this.buttonClear.Click  += this.ClearDataExtractorForm;
            this.buttonCancel.Click += this.CloseDataExtractorForm;
        }
        /** -------------------------------------------------------------------- **/

        private void ProcessGenericDataExtractorRegexes(
            MacroscopeDataExtractorRegexes DataExtractor,
            string GenericText
            )
        {
            List <KeyValuePair <string,  string> > Analyzed;

            Analyzed = DataExtractor.AnalyzeText(Text: GenericText);

            foreach (KeyValuePair <string,  string> Pair in Analyzed)
            {
                this.SetDataExtractedRegexes(
                    Label: Pair.Key,
                    Text: Pair.Value
                    );
            }
        }
        /** Process Data Extractors ***********************************************/

        private void ProcessGenericDataExtractors(
            string GenericText
            )
        {
            MacroscopeJobMaster JobMaster = this.DocCollection.GetJobMaster();

            {
                MacroscopeDataExtractorRegexes DataExtractor = JobMaster.GetDataExtractorRegexes();

                if ((DataExtractor != null) && (DataExtractor.IsEnabled()))
                {
                    this.ProcessGenericDataExtractorRegexes(
                        DataExtractor: DataExtractor,
                        GenericText: GenericText
                        );
                }
            }
        }
        public void TestWriteCsv()
        {
            MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE);
            MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1);
            MacroscopeDataExtractorRegexes      DataExtractorRegexes      = new MacroscopeDataExtractorRegexes(1);
            MacroscopeDataExtractorXpaths       DataExtractorXpaths       = new MacroscopeDataExtractorXpaths(1);
            MacroscopeCsvDataExtractorReport    ReportFile = new MacroscopeCsvDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths);
            List <MacroscopeCsvDataExtractorReport.OutputWorksheet> SelectedOutputWorksheets = new List <MacroscopeCsvDataExtractorReport.OutputWorksheet>();

            SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.CSS_SELECTORS);
            SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.REGEXES);
            SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.XPATHS);
            foreach (MacroscopeCsvDataExtractorReport.OutputWorksheet SelectedOutputWorksheet in SelectedOutputWorksheets)
            {
                string Filename = string.Join(".", Path.GetTempFileName(), ".csv");
                ReportFile.WriteCsv(JobMaster: JobMaster, SelectedOutputWorksheet: SelectedOutputWorksheet, OutputFilename: Filename);
                Assert.IsTrue(File.Exists(Filename));
                File.Delete(Filename);
            }
        }
Example #10
0
        /**************************************************************************/

        public void RefreshData(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorRegexes DataExtractor

            )
        {
            if (this.MainForm.InvokeRequired)
            {
                this.MainForm.Invoke(
                    new MethodInvoker(
                        delegate
                {
                    Cursor.Current = Cursors.WaitCursor;
                    this.DisplayListView.BeginUpdate();
                    this.RenderListView(
                        DocCollection: DocCollection,
                        UrlList: UrlList,
                        DataExtractor: DataExtractor
                        );
                    this.RenderUrlCount();
                    this.DisplayListView.EndUpdate();
                    Cursor.Current = Cursors.Default;
                }
                        )
                    );
            }
            else
            {
                Cursor.Current = Cursors.WaitCursor;
                this.DisplayListView.BeginUpdate();
                this.RenderListView(
                    DocCollection: DocCollection,
                    UrlList: UrlList,
                    DataExtractor: DataExtractor
                    );
                this.RenderUrlCount();
                this.DisplayListView.EndUpdate();
                Cursor.Current = Cursors.Default;
            }
        }
Example #11
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorRegexes DataExtractor
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocumentByUrl(Url: Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = ((int)msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedRegexes())
                {
                    ListViewItem lvItem         = null;
                    string       RegexLabel     = DataExtractedPair.Key;
                    string       ExtractedValue = DataExtractedPair.Value;
                    string       PairKey        = null;

                    if (
                        string.IsNullOrEmpty(RegexLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    PairKey = string.Join(
                        ":",
                        UrlToDigest(DocUrl),
                        UrlToDigest(Macroscope.GetStringDigest(Text: RegexLabel)),
                        UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue))
                        );

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        for (int i = 0; i < 6; i++)
                        {
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text            = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text     = StatusCode;
                            lvItem.SubItems[ColStatus].Text         = Status;
                            lvItem.SubItems[ColMimeType].Text       = MimeType;
                            lvItem.SubItems[ColRegexLabel].Text     = RegexLabel;
                            lvItem.SubItems[ColExtractedValue].Text = ExtractedValue;
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes MISSING: {0}", PairKey));
                    }

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (Regex.IsMatch(StatusCode, "^[2]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[3]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[45]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DeduplicateListView(DuplicatedListView: this.DisplayListView);

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
        /**************************************************************************/

        public void ConfigureDataExtractorForm(
            MacroscopeDataExtractorRegexesForm NewContainerForm,
            MacroscopeDataExtractorRegexes NewDataExtractor
            )
        {
            this.ContainerForm = NewContainerForm;

            this.DataExtractor = NewDataExtractor;

            int Max = this.DataExtractor.GetSize();
            TableLayoutPanel Table = this.tableLayoutPanelControlsGrid;

            Table.Dock        = DockStyle.Fill;
            Table.ColumnCount = 4;
            Table.RowCount    = Max + 1;

            {
                List <string> ColumnLabels = new List <string> (4)
                {
                    "",
                    "Active/Inactive",
                    "Extractor Label",
                    "Regular Expression Pattern"
                };

                for (int i = 0; i < ColumnLabels.Count; i++)
                {
                    Label TextLabelCol = new Label();

                    TextLabelCol.Text      = ColumnLabels[i];
                    TextLabelCol.TextAlign = ContentAlignment.BottomLeft;
                    TextLabelCol.Dock      = DockStyle.Fill;
                    TextLabelCol.Margin    = new Padding(5, 5, 5, 5);

                    Table.Controls.Add(TextLabelCol);
                }
            }

            for (int Slot = 0; Slot < Max; Slot++)
            {
                Label    TextLabel         = new Label();
                ComboBox StateComboBox     = new ComboBox();
                TextBox  TextBoxLabel      = new TextBox();
                TextBox  TextBoxExpression = new TextBox();

                TextLabel.Text      = string.Format("Regex {0}", Slot + 1);
                TextLabel.TextAlign = ContentAlignment.MiddleRight;
                TextLabel.Dock      = DockStyle.Fill;
                TextLabel.Margin    = new Padding(5, 5, 5, 5);

                StateComboBox.Name = string.Format("StateComboBox{0}", Slot + 1);
                StateComboBox.Items.Add("Inactive");
                StateComboBox.Items.Add("Active");
                StateComboBox.DropDownStyle = ComboBoxStyle.DropDownList;
                StateComboBox.SelectedIndex = 0;
                StateComboBox.Margin        = new Padding(5, 5, 5, 5);
                StateComboBox.Width         = 100;

                TextBoxLabel.Name   = string.Format("TextBoxLabel{0}", Slot + 1);
                TextBoxLabel.Dock   = DockStyle.Fill;
                TextBoxLabel.Margin = new Padding(5, 5, 5, 5);
                TextBoxLabel.Tag    = Slot.ToString();

                TextBoxLabel.KeyUp       += this.CallbackTextBoxKeyUp;
                TextBoxLabel.TextChanged += this.CallbackTextBoxLabelTextChanged;

                TextBoxExpression.Name   = string.Format("TextBoxExpression{0}", Slot + 1);
                TextBoxExpression.Dock   = DockStyle.Fill;
                TextBoxExpression.Margin = new Padding(5, 5, 5, 5);
                TextBoxExpression.Tag    = Slot.ToString();

                TextBoxExpression.KeyUp       += this.CallbackTextBoxKeyUp;
                TextBoxExpression.TextChanged += this.CallbackTextBoxExpressionTextChanged;

                Table.Controls.Add(TextLabel);
                Table.Controls.Add(StateComboBox);
                Table.Controls.Add(TextBoxLabel);
                Table.Controls.Add(TextBoxExpression);

                this.TextBoxLabels.Add(TextBoxLabel);
                this.StateComboBoxes.Add(StateComboBox);
                this.TextBoxExpressions.Add(TextBoxExpression);
            }

            // Add empty last row for space adjustment
            for (int i = 0; i < Table.ColumnCount; i++)
            {
                Label TextLabelCol = new Label();
                TextLabelCol.Text = "";
                Table.Controls.Add(TextLabelCol);
            }

            Table.AutoScroll = false;
            Table.Padding    = new Padding(0, 0, 15, 0);
            Table.AutoScroll = true;
        }
        /** -------------------------------------------------------------------- **/

        public void SetDataExtractorRegexes(MacroscopeDataExtractorRegexes NewDataExtractor)
        {
            this.DataExtractorRegexes = NewDataExtractor;
        }