コード例 #1
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            string Title       = msDoc.GetTitle();
            string Description = msDoc.GetDescription();
            string Keywords    = msDoc.GetKeywords();

            string PairKey = string.Join("", Url);

            ListViewItem lvItem = null;

            if (this.DisplayListView.Items.ContainsKey(PairKey))
            {
                try
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems[1].Text = Title;
                    lvItem.SubItems[2].Text = Description;
                    lvItem.SubItems[3].Text = Keywords;
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplaySearchCollection 1: {0}", ex.Message));
                }
            }
            else
            {
                try
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems.Add(Title);
                    lvItem.SubItems.Add(Description);
                    lvItem.SubItems.Add(Keywords);

                    ListViewItems.Add(lvItem);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplaySearchCollection 2: {0}", ex.Message));
                }
            }

            //this.DocumentCount.Text = string.Format( "Documents: {0}", DisplayListView.Items.Count );
        }
コード例 #2
0
        /**************************************************************************/

        private void ProcessText(MacroscopeDocument msDoc)
        {
            List <string> TextBlocks    = new List <string> (16);
            List <string> Terms         = new List <string> (256);
            bool          CaseSensitive = MacroscopePreferencesManager.GetCaseSensitiveTextIndexing();

            TextBlocks.Add(msDoc.GetTitle());
            TextBlocks.Add(msDoc.GetDescription());
            TextBlocks.Add(msDoc.GetKeywords());
            TextBlocks.Add(msDoc.GetDocumentTextCleaned());

            DebugMsg(string.Format("ProcessText: TextBlocks.Count: {0}", TextBlocks.Count));

            if (TextBlocks.Count > 0)
            {
                for (int i = 0; i < TextBlocks.Count; i++)
                {
                    string [] Chunk = TextBlocks[i].Split(' ');
                    if (Chunk.Length > 0)
                    {
                        for (int j = 0; j < Chunk.Length; j++)
                        {
                            if (Chunk[j].Length > 0)
                            {
                                if (!Terms.Contains(Chunk[j]))
                                {
                                    Terms.Add(Chunk[j]);
                                }
                            }
                        }
                    }
                }
            }

            DebugMsg(string.Format("ProcessText: Words :: {0}", Terms.Count));

            for (int i = 0; i < Terms.Count; i++)
            {
                Dictionary <string, MacroscopeDocument> DocumentReference;

                string Term = Terms[i];

                if (!CaseSensitive)
                {
                    Term = Term.ToLower();
                }

                DebugMsg(string.Format("ProcessText: Term :: {0}", Term));

                if (InvertedIndex.ContainsKey(Term))
                {
                    DocumentReference = this.InvertedIndex[Term];
                }
                else
                {
                    DocumentReference = new Dictionary <string, MacroscopeDocument> ();
                    this.InvertedIndex.Add(Term, DocumentReference);
                }

                if (!DocumentReference.ContainsKey(msDoc.GetUrl()))
                {
                    DocumentReference.Add(msDoc.GetUrl(), msDoc);
                }
            }
        }
コード例 #3
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            bool Proceed = false;

            if (msDoc.GetIsExternal())
            {
                return;
            }

            if (msDoc.GetIsRedirect())
            {
                return;
            }

            switch (msDoc.GetDocumentType())
            {
            case MacroscopeConstants.DocumentType.HTML:
                Proceed = true;
                break;

            case MacroscopeConstants.DocumentType.PDF:
                Proceed = true;
                break;

            default:
                break;
            }

            if (Proceed)
            {
                ListViewItem lvItem            = null;
                int          Occurrences       = 0;
                string       PageLanguage      = msDoc.GetIsoLanguageCode();
                string       DetectedLanguage  = msDoc.GetDescriptionLanguage();
                string       Description       = msDoc.GetDescription();
                int          DescriptionLength = msDoc.GetDescriptionLength();

                string PairKey = string.Join(":", UrlToDigest(Url), UrlToDigest(Description));

                if (string.IsNullOrEmpty(PageLanguage))
                {
                    PageLanguage = "";
                }

                if (string.IsNullOrEmpty(DetectedLanguage))
                {
                    DetectedLanguage = "";
                }

                if (DescriptionLength > 0)
                {
                    Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc);
                }
                else
                {
                    Description = "MISSING";
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[ColUrl].Text              = Url;
                        lvItem.SubItems[ColPageLanguage].Text     = PageLanguage;
                        lvItem.SubItems[ColDetectedLanguage].Text = DetectedLanguage;
                        lvItem.SubItems[ColOccurences].Text       = Occurrences.ToString();
                        lvItem.SubItems[ColDescriptionText].Text  = Description;
                        lvItem.SubItems[ColLength].Text           = DescriptionLength.ToString();
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDescriptions 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[ColUrl].Text = Url;
                        lvItem.SubItems.Add(PageLanguage);
                        lvItem.SubItems.Add(DetectedLanguage);
                        lvItem.SubItems.Add(Occurrences.ToString());
                        lvItem.SubItems.Add(Description);
                        lvItem.SubItems.Add(DescriptionLength.ToString());

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDescriptions 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    // URL -------------------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    // Description Language --------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Green;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green;

                        if (DetectedLanguage != PageLanguage)
                        {
                            lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Red;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Gray;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray;
                    }

                    // Check Description Length ----------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen())
                        {
                            lvItem.SubItems[ColUrl].ForeColor              = Color.Red;
                            lvItem.SubItems[ColOccurences].ForeColor       = Color.Red;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red;
                            lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Red;
                            lvItem.SubItems[ColLength].ForeColor           = Color.Red;
                        }
                        else
                        if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen())
                        {
                            lvItem.SubItems[ColUrl].ForeColor              = Color.Red;
                            lvItem.SubItems[ColOccurences].ForeColor       = Color.Red;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red;
                            lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Red;
                            lvItem.SubItems[ColLength].ForeColor           = Color.Red;
                        }
                        else
                        {
                            lvItem.SubItems[ColOccurences].ForeColor       = Color.Green;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green;
                            lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Green;
                            lvItem.SubItems[ColLength].ForeColor           = Color.Green;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[ColOccurences].ForeColor       = Color.Gray;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray;
                        lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Gray;
                        lvItem.SubItems[ColLength].ForeColor           = Color.Gray;
                    }
                }
            }
        }
コード例 #4
0
        /** Render One ************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            lock (this.DisplayListViewLock)
            {
                Dictionary <string, string> StructureItems = new Dictionary <string, string>();

                ListViewItem lvItem = null;

                string TitleLanguage       = msDoc.GetTitleLanguage();
                string DescriptionLanguage = msDoc.GetDescriptionLanguage();
                string BodyTextLanguage    = msDoc.GetDocumentTextLanguage();
                int    StatusCode          = (int)msDoc.GetStatusCode();
                string PairKey             = UrlToDigest(Url).ToString();

                if (string.IsNullOrEmpty(TitleLanguage))
                {
                    TitleLanguage = "";
                }

                if (string.IsNullOrEmpty(DescriptionLanguage))
                {
                    DescriptionLanguage = "";
                }

                if (string.IsNullOrEmpty(BodyTextLanguage))
                {
                    BodyTextLanguage = "";
                }

                // BEGIN: Columns ----------------------------------------------------//

                StructureItems.Add(MacroscopeConstants.Url, msDoc.GetUrl());

                StructureItems.Add(MacroscopeConstants.StatusCode, StatusCode.ToString());
                StructureItems.Add(MacroscopeConstants.Status, msDoc.GetStatusCode().ToString());
                StructureItems.Add(MacroscopeConstants.IsRedirect, msDoc.GetIsRedirect().ToString());

                StructureItems.Add(MacroscopeConstants.RobotsRule, msDoc.GetAllowedByRobotsAsString());

                StructureItems.Add(MacroscopeConstants.Duration, msDoc.GetDurationInSecondsFormatted());

                StructureItems.Add(MacroscopeConstants.ContentType, msDoc.GetMimeType());

                {
                    string Charset = msDoc.GetCharacterSet();
                    if (string.IsNullOrEmpty(Charset))
                    {
                        Charset = "";
                    }
                    StructureItems.Add(MacroscopeConstants.Charset, Charset);
                }

                {
                    string LocaleCode = msDoc.GetLocale();
                    if (string.IsNullOrEmpty(LocaleCode))
                    {
                        LocaleCode = "";
                    }
                    StructureItems.Add(MacroscopeConstants.Locale, LocaleCode);
                }

                {
                    string LanguageCode = msDoc.GetIsoLanguageCode();
                    if (string.IsNullOrEmpty(LanguageCode))
                    {
                        LanguageCode = "";
                    }
                    StructureItems.Add(MacroscopeConstants.Language, LanguageCode);
                }

                StructureItems.Add(MacroscopeConstants.DateCrawled, msDoc.GetCrawledDate());

                StructureItems.Add(MacroscopeConstants.DateServer, msDoc.GetDateServer());
                StructureItems.Add(MacroscopeConstants.DateModified, msDoc.GetDateModified());
                StructureItems.Add(MacroscopeConstants.DateExpires, msDoc.GetDateExpires());

                StructureItems.Add(MacroscopeConstants.Canonical, msDoc.GetCanonical());

                StructureItems.Add(MacroscopeConstants.PageDepth, msDoc.GetDepth().ToString());

                StructureItems.Add(MacroscopeConstants.Inlinks, msDoc.CountInlinks().ToString());
                StructureItems.Add(MacroscopeConstants.Outlinks, msDoc.CountOutlinks().ToString());

                StructureItems.Add(MacroscopeConstants.HyperlinksIn, msDoc.CountHyperlinksIn().ToString());
                StructureItems.Add(MacroscopeConstants.HyperlinksOut, msDoc.CountHyperlinksOut().ToString());

                // TODO: This is too slow:

                /*
                 * {
                 * List<decimal> HyperlinkRatio = DocCollection.GetDocumentHyperlinksRatio( Url: Url );
                 * StructureItems.Add( MacroscopeConstants.HyperlinksInRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 0 ] ) );
                 * StructureItems.Add( MacroscopeConstants.HyperlinksOutRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 1 ] ) );
                 * }
                 */

                StructureItems.Add(MacroscopeConstants.Author, msDoc.GetAuthor());

                StructureItems.Add(MacroscopeConstants.Title, msDoc.GetTitle());
                StructureItems.Add(MacroscopeConstants.TitleLen, msDoc.GetTitleLength().ToString());
                StructureItems.Add(MacroscopeConstants.TitleLang, TitleLanguage);

                StructureItems.Add(MacroscopeConstants.Description, msDoc.GetDescription());
                StructureItems.Add(MacroscopeConstants.DescriptionLen, msDoc.GetDescriptionLength().ToString());
                StructureItems.Add(MacroscopeConstants.DescriptionLang, DescriptionLanguage);

                StructureItems.Add(MacroscopeConstants.Keywords, msDoc.GetKeywords());
                StructureItems.Add(MacroscopeConstants.KeywordsLen, msDoc.GetKeywordsLength().ToString());
                StructureItems.Add(MacroscopeConstants.KeywordsCount, msDoc.GetKeywordsCount().ToString());

                StructureItems.Add(MacroscopeConstants.BodyTextLang, BodyTextLanguage);

                for (ushort HeadingLevel = 1; HeadingLevel <= MaxHeadingsDisplayed; HeadingLevel++)
                {
                    List <string> HeadingList = msDoc.GetHeadings(HeadingLevel: HeadingLevel);
                    string        HeadingText = "";
                    if (HeadingList.Count > 0)
                    {
                        HeadingText = HeadingList[0];
                    }
                    StructureItems.Add(string.Format(MacroscopeConstants.Hn, HeadingLevel), HeadingText);
                }

                StructureItems.Add(MacroscopeConstants.ErrorCondition, msDoc.GetErrorCondition());

                // END: Columns ------------------------------------------------------//

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                }
                else
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    for (int i = 0; i < this.DisplayListView.Columns.Count; i++)
                    {
                        lvItem.SubItems.Add("");
                    }

                    ListViewItems.Add(lvItem);
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    int StatusCodeColIndex = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.StatusCode);
                    int StatusColIndex     = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.Status);

                    foreach (string ItemsKey in StructureItems.Keys)
                    {
                        int    ColIndex = this.DisplayListView.Columns.IndexOfKey(ItemsKey);
                        string Text     = StructureItems[ItemsKey];

                        if (!string.IsNullOrEmpty(StructureItems[ItemsKey]))
                        {
                            lvItem.SubItems[ColIndex].Text = Text;
                        }
                        else
                        {
                            lvItem.SubItems[ColIndex].Text = "";
                        }

                        if (msDoc.GetIsInternal())
                        {
                            lvItem.SubItems[ColIndex].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColIndex].ForeColor = Color.Gray;
                        }

                        if (ItemsKey.Equals(MacroscopeConstants.StatusCode))
                        {
                            if ((StatusCode >= 200) && (StatusCode <= 299))
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Green;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Green;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Green;
                            }
                            else
                            if ((StatusCode >= 300) && (StatusCode <= 399))
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Goldenrod;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Goldenrod;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Goldenrod;
                            }
                            else
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Red;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Red;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Blue;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Blue;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Blue;
                            }

                            if (StatusCode == 410)
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Purple;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Purple;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Purple;
                            }
                        }

                        if (ItemsKey == MacroscopeConstants.RobotsRule)
                        {
                            if (Text.ToLower() == "disallowed")
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Green;
                            }
                        }

                        if (ItemsKey == MacroscopeConstants.IsRedirect)
                        {
                            if (Text.ToLower() == "true")
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Gray;
                            }
                        }
                    }
                }
                else
                {
                    DebugMsg(string.Format("MacroscopeDisplayStructure: {0}", "lvItem is NULL"));
                }
            }
        }
コード例 #5
0
        /**************************************************************************/

        public List <KeyValuePair <string, KEYWORD_STATUS> > AnalyzeKeywordPresence(MacroscopeDocument msDoc)
        {
            string        Keywords        = msDoc.GetKeywords().ToLower();
            string        TitleText       = msDoc.GetTitle().ToLower();
            string        DescriptionText = msDoc.GetDescription().ToLower();
            string        BodyText        = msDoc.GetDocumentTextCleaned().ToLower();
            List <string> KeywordsList    = new List <string>();
            List <KeyValuePair <string, KEYWORD_STATUS> > KeywordPresence = new List <KeyValuePair <string, KEYWORD_STATUS> >();
            bool KeywordsMetatagFilled = false;

            foreach (string Keyword in Keywords.Split(','))
            {
                string KeywordCleaned = MacroscopeStringTools.CleanWhiteSpace(Keyword);

                if (KeywordCleaned.Length > 0)
                {
                    KeywordsList.Add(KeywordCleaned);
                    KeywordsMetatagFilled = true;
                }
            }

            if (KeywordsMetatagFilled)
            {
                foreach (string Keyword in KeywordsList)
                {
                    try
                    {
                        string kw = this.GetPatternForLanguage(msDoc: msDoc, Keyword: Keyword);

                        if (Regex.IsMatch(TitleText, kw))
                        {
                            KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.PRESENT_IN_TITLE));
                        }
                        else
                        {
                            KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MISSING_IN_TITLE));
                        }

                        if (Regex.IsMatch(DescriptionText, kw))
                        {
                            KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.PRESENT_IN_DESCRIPTION));
                        }
                        else
                        {
                            KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MISSING_IN_DESCRIPTION));
                        }

                        if (Regex.IsMatch(BodyText, kw))
                        {
                            KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.PRESENT_IN_BODY));
                        }
                        else
                        {
                            KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MISSING_IN_BODY));
                        }
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(ex.Message);
                        KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MALFORMED_KEYWORDS_METATAG));
                    }
                }
            }
            else
            {
                KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>("", KEYWORD_STATUS.KEYWORDS_METATAG_EMPTY));
            }

            return(KeywordPresence);
        }
コード例 #6
0
        /**************************************************************************/

        private void BuildWorksheetPageDescriptions(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Detected Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description Length";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    iCol = 1;

                    string Description       = msDoc.GetDescription();
                    string PageLanguage      = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage  = msDoc.GetTitleLanguage();
                    int    Occurrences       = 0;
                    int    DescriptionLength = msDoc.GetDescriptionLength();

                    if (DescriptionLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }
                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);

                    if (Occurrences > 1)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Description));

                    if (DescriptionLength <= 0)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                        ws.Cell(iRow, iCol).Value = "MISSING";
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, DescriptionLength);

                    if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
コード例 #7
0
        /**************************************************************************/

        private void BuildWorksheetOverview(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Redirect";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Duration";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Crawled Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Server Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Modified Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Expires Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Content-Type";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Canonical";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Depth";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Links In";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Links Out";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Hyperlinks In";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Hyperlinks Out";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Error Condition";

                for (int i = 1; i <= iCol; i++)
                {
                    ws.Cell(iRow, i).Style.Font.SetBold();
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (string sKey in DocCollection.DocumentKeys())
            {
                iCol = 1;

                MacroscopeDocument msDoc = DocCollection.GetDocument(sKey);

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));
                iCol++;

                this.InsertAndFormatRedirectCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDurationInSecondsFormatted());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetCrawledDate());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateServer());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateModified());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateExpires());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetMimeType()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetLocale()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetIsoLanguageCode()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetCanonical()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDepth().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountInlinks());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountOutlinks());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksIn());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksOut());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetTitle()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetTitleLength().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetDescription()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDescriptionLength());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetErrorCondition()));

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
コード例 #8
0
        /**************************************************************************/

        private void BuildWorksheetOverview(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Redirect");
                ws.WriteField("Duration");
                ws.WriteField("Crawled Date");
                ws.WriteField("Server Date");
                ws.WriteField("Modified Date");
                ws.WriteField("Expires Date");
                ws.WriteField("Content-Type");
                ws.WriteField("Locale");
                ws.WriteField("Language");
                ws.WriteField("Canonical");
                ws.WriteField("Page Depth");
                ws.WriteField("Links In");
                ws.WriteField("Links Out");
                ws.WriteField("Hyperlinks In");
                ws.WriteField("Hyperlinks Out");
                ws.WriteField("Title");
                ws.WriteField("Title Length");
                ws.WriteField("Description");
                ws.WriteField("Description Length");
                ws.WriteField("Error Condition");

                ws.NextRecord();
            }

            foreach (string Key in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Key);

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatStatusCodeCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));

                this.InsertAndFormatRedirectCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDurationInSecondsFormatted()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCrawledDate()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateServer()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateModified()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateExpires()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetMimeType()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetLocale()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetIsoLanguageCode()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCanonical()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDepth().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountInlinks().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountOutlinks().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksIn().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksOut().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitle()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitleLength().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescription()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescriptionLength().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetErrorCondition()));

                ws.NextRecord();
            }
        }
コード例 #9
0
        /**************************************************************************/

        private void BuildWorksheetPageDescriptions(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Page Language");
                ws.WriteField("Detected Language");
                ws.WriteField("Occurrences");
                ws.WriteField("Description");
                ws.WriteField("Description Length");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    string Description       = msDoc.GetDescription();
                    string PageLanguage      = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage  = msDoc.GetTitleLanguage();
                    int    Occurrences       = 0;
                    int    DescriptionLength = msDoc.GetDescriptionLength();

                    if (DescriptionLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Description));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DescriptionLength.ToString()));

                    ws.NextRecord();
                }
            }
        }