/** -------------------------------------------------------------------- **/

        protected override bool ValidateExpression(TextBox TextBoxObject, bool ShowErrorDialogue)
        {
            bool IsValid = false;

            if (!this.GetEnableValidation())
            {
                IsValid = false;
            }

            try
            {
                string Value = TextBoxObject.Text;

                if (MacroscopeDataExtractorXpaths.SyntaxCheckXpath(XpathString: Value))
                {
                    IsValid = true;
                }
            }
            catch (Exception ex)
            {
                ms.DebugMsg(ex.Message);
            }

            if ((!IsValid) && (ShowErrorDialogue))
            {
                this.DialogueBoxError(AlertTitle: "Error", AlertMessage: "Invalid XPath Expression.");
                TextBoxObject.Focus();
            }

            return(IsValid);
        }
Ejemplo n.º 2
0
        /**************************************************************************/

        public MacroscopeExcelDataExtractorReport(
            MacroscopeDataExtractorCssSelectors NewDataExtractorCssSelectors,
            MacroscopeDataExtractorRegexes NewDataExtractorRegexes,
            MacroscopeDataExtractorXpaths NewDataExtractorXpaths
            )
        {
            this.DataExtractorCssSelectors = NewDataExtractorCssSelectors;
            this.DataExtractorRegexes      = NewDataExtractorRegexes;
            this.DataExtractorXpaths       = NewDataExtractorXpaths;
        }
        public void TestTitlesOuterHtml()
        {
            Dictionary <string, string> AssetDic = new Dictionary <string, string> ()
            {
                {
                    "HtmlDoc001",
                    "<title>HtmlDoc001</title>"
                },
                {
                    "HtmlDoc002",
                    "<title>HtmlDoc002</title>"
                },
                {
                    "HtmlDoc003",
                    "<title>HtmlDoc003</title>"
                },
                {
                    "HtmlDoc004",
                    "<title>HtmlDoc004</title>"
                },
                {
                    "HtmlDoc005",
                    "<title>HtmlDoc005</title>"
                }
            };

            MacroscopeDataExtractorXpaths DataExtractor = new MacroscopeDataExtractorXpaths(Size: 1);

            DataExtractor.SetXpath(
                Slot: 0,
                XpathLabel: "TestTitlesOuterHtml",
                XpathString: "//title",
                ExtractorType: MacroscopeConstants.DataExtractorType.OUTERHTML
                );

            DataExtractor.SetActiveInactive(
                Slot: 0,
                State: MacroscopeConstants.ActiveInactive.ACTIVE
                );

            foreach (string HtmlDocKey in this.HtmlDocs.Keys)
            {
                string Html = this.HtmlDocs[HtmlDocKey];

                List <KeyValuePair <string,  string> > ResultList = DataExtractor.AnalyzeHtml(Html: Html);

                DebugMsg(string.Format("HtmlDocKey: {0} :: Value: {1}", HtmlDocKey, ResultList[0].Value));

                Assert.IsNotEmpty(ResultList, "WHOOPS!");

                Assert.AreEqual(AssetDic[HtmlDocKey], ResultList[0].Value);
            }
        }
        public void TestWriteXslx()
        {
            MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE);
            MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1);
            MacroscopeDataExtractorRegexes      DataExtractorRegexes      = new MacroscopeDataExtractorRegexes(1);
            MacroscopeDataExtractorXpaths       DataExtractorXpaths       = new MacroscopeDataExtractorXpaths(1);
            MacroscopeExcelDataExtractorReport  Report = new MacroscopeExcelDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths);
            string Filename = string.Join(".", Path.GetTempFileName(), "xlsx");

            Report.WriteXslx(JobMaster: JobMaster, OutputFilename: Filename);
            Assert.IsTrue(File.Exists(Filename));
            File.Delete(Filename);
        }
Ejemplo n.º 5
0
        /**************************************************************************/

        public MacroscopeDataExtractorXpathsForm(MacroscopeDataExtractorXpaths NewDataExtractor)
        {
            InitializeComponent(); // The InitializeComponent() call is required for Windows Forms designer support.

            this.dataExtractorInstance.ConfigureDataExtractorForm(
                NewContainerForm: this,
                NewDataExtractor: NewDataExtractor
                );

            this.dataExtractorInstance.SetDataExtractor();

            this.FormClosing        += this.CallbackFormClosing;
            this.buttonClear.Click  += this.ClearDataExtractorForm;
            this.buttonCancel.Click += this.CloseDataExtractorForm;
        }
        public void TestWriteCsv()
        {
            MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(MacroscopeConstants.RunTimeMode.LIVE);
            MacroscopeDataExtractorCssSelectors DataExtractorCssSelectors = new MacroscopeDataExtractorCssSelectors(1);
            MacroscopeDataExtractorRegexes      DataExtractorRegexes      = new MacroscopeDataExtractorRegexes(1);
            MacroscopeDataExtractorXpaths       DataExtractorXpaths       = new MacroscopeDataExtractorXpaths(1);
            MacroscopeCsvDataExtractorReport    ReportFile = new MacroscopeCsvDataExtractorReport(NewDataExtractorCssSelectors: DataExtractorCssSelectors, NewDataExtractorRegexes: DataExtractorRegexes, NewDataExtractorXpaths: DataExtractorXpaths);
            List <MacroscopeCsvDataExtractorReport.OutputWorksheet> SelectedOutputWorksheets = new List <MacroscopeCsvDataExtractorReport.OutputWorksheet>();

            SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.CSS_SELECTORS);
            SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.REGEXES);
            SelectedOutputWorksheets.Add(MacroscopeCsvDataExtractorReport.OutputWorksheet.XPATHS);
            foreach (MacroscopeCsvDataExtractorReport.OutputWorksheet SelectedOutputWorksheet in SelectedOutputWorksheets)
            {
                string Filename = string.Join(".", Path.GetTempFileName(), ".csv");
                ReportFile.WriteCsv(JobMaster: JobMaster, SelectedOutputWorksheet: SelectedOutputWorksheet, OutputFilename: Filename);
                Assert.IsTrue(File.Exists(Filename));
                File.Delete(Filename);
            }
        }
        /**************************************************************************/

        public void RefreshData(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorXpaths DataExtractor
            )
        {
            if (this.MainForm.InvokeRequired)
            {
                this.MainForm.Invoke(
                    new MethodInvoker(
                        delegate
                {
                    Cursor.Current = Cursors.WaitCursor;
                    this.DisplayListView.BeginUpdate();
                    this.RenderListView(
                        DocCollection: DocCollection,
                        UrlList: UrlList,
                        DataExtractor: DataExtractor
                        );
                    this.RenderUrlCount();
                    this.DisplayListView.EndUpdate();
                    Cursor.Current = Cursors.Default;
                }
                        )
                    );
            }
            else
            {
                Cursor.Current = Cursors.WaitCursor;
                this.DisplayListView.BeginUpdate();
                this.RenderListView(
                    DocCollection: DocCollection,
                    UrlList: UrlList,
                    DataExtractor: DataExtractor
                    );
                this.RenderUrlCount();
                this.DisplayListView.EndUpdate();
                Cursor.Current = Cursors.Default;
            }
        }
Ejemplo n.º 8
0
        public void TestTitlesInnerText()
        {
            Dictionary <string, string> AssetDic = new Dictionary <string, string>();

            AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc001.html", "HtmlDoc001");
            AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc002.html", "HtmlDoc002");
            AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc003.html", "HtmlDoc003");
            AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc004.html", "HtmlDoc004");
            AssetDic.Add("SEOMacroscope.src.MacroscopeDataExtractor.t.HtmlDocs.HtmlDoc005.html", "HtmlDoc005");

            MacroscopeDataExtractorXpaths DataExtractor = new MacroscopeDataExtractorXpaths(Size: 1);

            DataExtractor.SetXpath(
                Slot: 0,
                XpathLabel: "TestTitlesInnerText",
                XpathString: "//title",
                ExtractorType: MacroscopeConstants.DataExtractorType.INNERTEXT
                );

            DataExtractor.SetActiveInactive(
                Slot: 0,
                State: MacroscopeConstants.ActiveInactive.ACTIVE
                );

            foreach (string HtmlDocKey in this.HtmlDocs.Keys)
            {
                string Html = this.HtmlDocs[HtmlDocKey];

                List <KeyValuePair <string, string> > ResultList = DataExtractor.AnalyzeHtml(Html: Html);

                DebugMsg(string.Format("HtmlDocKey: {0} :: Value: {1}", HtmlDocKey, ResultList[0].Value));

                Assert.IsNotEmpty(ResultList, "WHOOPS!");

                Assert.AreEqual(AssetDic[HtmlDocKey], ResultList[0].Value);
            }
        }
        /**************************************************************************/

        public void ConfigureDataExtractorForm(
            MacroscopeDataExtractorXpathsForm NewContainerForm,
            MacroscopeDataExtractorXpaths NewDataExtractor
            )
        {
            this.ContainerForm = NewContainerForm;

            this.DataExtractor = NewDataExtractor;

            int Max = this.DataExtractor.GetSize();
            TableLayoutPanel Table = this.tableLayoutPanelControlsGrid;

            Table.Dock        = DockStyle.Fill;
            Table.ColumnCount = 5;
            Table.RowCount    = Max + 1;

            {
                List <string> ColumnLabels = new List <string> (5)
                {
                    "",
                    "Active/Inactive",
                    "Extractor Label",
                    "XPath Expression",
                    "Extract To"
                };

                for (int i = 0; i < ColumnLabels.Count; i++)
                {
                    Label TextLabelCol = new Label();

                    TextLabelCol.Text      = ColumnLabels[i];
                    TextLabelCol.TextAlign = ContentAlignment.BottomLeft;
                    TextLabelCol.Dock      = DockStyle.Fill;
                    TextLabelCol.Margin    = new Padding(5, 5, 5, 5);

                    Table.Controls.Add(TextLabelCol);
                }
            }

            for (int Slot = 0; Slot < Max; Slot++)
            {
                Label    TextLabel         = new Label();
                ComboBox StateComboBox     = new ComboBox();
                TextBox  TextBoxLabel      = new TextBox();
                TextBox  TextBoxExpression = new TextBox();
                ComboBox ExtractToComboBox = new ComboBox();

                TextLabel.Text      = string.Format("XPath {0}", Slot + 1);
                TextLabel.TextAlign = ContentAlignment.MiddleRight;
                TextLabel.Dock      = DockStyle.Fill;
                TextLabel.Margin    = new Padding(5, 5, 5, 5);

                StateComboBox.Name = string.Format("StateComboBox{0}", Slot + 1);
                StateComboBox.Items.Add("Inactive");
                StateComboBox.Items.Add("Active");
                StateComboBox.DropDownStyle = ComboBoxStyle.DropDownList;
                StateComboBox.SelectedIndex = 0;
                StateComboBox.Margin        = new Padding(5, 5, 5, 5);
                StateComboBox.Width         = 100;

                TextBoxLabel.Name   = string.Format("TextBoxLabel{0}", Slot + 1);
                TextBoxLabel.Dock   = DockStyle.Fill;
                TextBoxLabel.Margin = new Padding(5, 5, 5, 5);
                TextBoxLabel.Tag    = Slot.ToString();

                TextBoxLabel.KeyUp       += this.CallbackTextBoxKeyUp;
                TextBoxLabel.TextChanged += this.CallbackTextBoxLabelTextChanged;

                TextBoxExpression.Name   = string.Format("TextBoxExpression{0}", Slot + 1);
                TextBoxExpression.Dock   = DockStyle.Fill;
                TextBoxExpression.Margin = new Padding(5, 5, 5, 5);
                TextBoxExpression.Tag    = Slot.ToString();

                TextBoxExpression.KeyUp       += this.CallbackTextBoxKeyUp;
                TextBoxExpression.TextChanged += this.CallbackTextBoxExpressionTextChanged;

                ExtractToComboBox.Name = string.Format("ExtractToComboBox{0}", Slot + 1);
                ExtractToComboBox.Items.Add("Extract HTML Element");
                ExtractToComboBox.Items.Add("Extract Inner HTML Elements");
                ExtractToComboBox.Items.Add("Extract Text");
                ExtractToComboBox.DropDownStyle = ComboBoxStyle.DropDownList;
                ExtractToComboBox.SelectedIndex = 0;
                ExtractToComboBox.Margin        = new Padding(5, 5, 5, 5);
                ExtractToComboBox.Width         = 160;

                Table.Controls.Add(TextLabel);
                Table.Controls.Add(StateComboBox);
                Table.Controls.Add(TextBoxLabel);
                Table.Controls.Add(TextBoxExpression);
                Table.Controls.Add(ExtractToComboBox);

                this.TextBoxLabels.Add(TextBoxLabel);
                this.StateComboBoxes.Add(StateComboBox);
                this.TextBoxExpressions.Add(TextBoxExpression);
                this.ExtractToComboBoxes.Add(ExtractToComboBox);
            }

            // Add empty last row for space adjustment
            for (int i = 0; i < Table.ColumnCount; i++)
            {
                Label TextLabelCol = new Label();
                TextLabelCol.Text = "";
                Table.Controls.Add(TextLabelCol);
            }

            Table.AutoScroll = false;
            Table.Padding    = new Padding(0, 0, 15, 0);
            Table.AutoScroll = true;
        }
Ejemplo n.º 10
0
        /** -------------------------------------------------------------------- **/

        public void SetDataExtractorXpaths(MacroscopeDataExtractorXpaths NewDataExtractor)
        {
            this.DataExtractorXpaths = NewDataExtractor;
        }
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorXpaths DataExtractor
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocumentByUrl(Url: Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedXpaths())
                {
                    ListViewItem lvItem         = null;
                    string       XpathLabel     = DataExtractedPair.Key;
                    string       ExtractedValue = DataExtractedPair.Value;
                    string       PairKey        = null;

                    if (
                        string.IsNullOrEmpty(XpathLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    PairKey = string.Join(
                        ":",
                        UrlToDigest(DocUrl),
                        UrlToDigest(Macroscope.GetStringDigest(Text: XpathLabel)),
                        UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue))
                        );

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        for (int i = 0; i < 6; i++)
                        {
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text            = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text     = StatusCode;
                            lvItem.SubItems[ColStatus].Text         = Status;
                            lvItem.SubItems[ColMimeType].Text       = MimeType;
                            lvItem.SubItems[ColXpathLabel].Text     = XpathLabel;
                            lvItem.SubItems[ColExtractedValue].Text = ExtractedValue;
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorXpaths: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorXpaths: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDataExtractorXpaths MISSING: {0}", PairKey));
                    }

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (Regex.IsMatch(StatusCode, "^[2]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[3]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[45]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }