/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { string Title = msDoc.GetTitle(); string Description = msDoc.GetDescription(); string Keywords = msDoc.GetKeywords(); string PairKey = string.Join("", Url); ListViewItem lvItem = null; if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[0].Text = Url; lvItem.SubItems[1].Text = Title; lvItem.SubItems[2].Text = Description; lvItem.SubItems[3].Text = Keywords; } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplaySearchCollection 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[0].Text = Url; lvItem.SubItems.Add(Title); lvItem.SubItems.Add(Description); lvItem.SubItems.Add(Keywords); ListViewItems.Add(lvItem); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplaySearchCollection 2: {0}", ex.Message)); } } //this.DocumentCount.Text = string.Format( "Documents: {0}", DisplayListView.Items.Count ); }
/**************************************************************************/ private void ProcessText(MacroscopeDocument msDoc) { List <string> TextBlocks = new List <string> (16); List <string> Terms = new List <string> (256); bool CaseSensitive = MacroscopePreferencesManager.GetCaseSensitiveTextIndexing(); TextBlocks.Add(msDoc.GetTitle()); TextBlocks.Add(msDoc.GetDescription()); TextBlocks.Add(msDoc.GetKeywords()); TextBlocks.Add(msDoc.GetDocumentTextCleaned()); DebugMsg(string.Format("ProcessText: TextBlocks.Count: {0}", TextBlocks.Count)); if (TextBlocks.Count > 0) { for (int i = 0; i < TextBlocks.Count; i++) { string [] Chunk = TextBlocks[i].Split(' '); if (Chunk.Length > 0) { for (int j = 0; j < Chunk.Length; j++) { if (Chunk[j].Length > 0) { if (!Terms.Contains(Chunk[j])) { Terms.Add(Chunk[j]); } } } } } } DebugMsg(string.Format("ProcessText: Words :: {0}", Terms.Count)); for (int i = 0; i < Terms.Count; i++) { Dictionary <string, MacroscopeDocument> DocumentReference; string Term = Terms[i]; if (!CaseSensitive) { Term = Term.ToLower(); } DebugMsg(string.Format("ProcessText: Term :: {0}", Term)); if (InvertedIndex.ContainsKey(Term)) { DocumentReference = this.InvertedIndex[Term]; } else { DocumentReference = new Dictionary <string, MacroscopeDocument> (); this.InvertedIndex.Add(Term, DocumentReference); } if (!DocumentReference.ContainsKey(msDoc.GetUrl())) { DocumentReference.Add(msDoc.GetUrl(), msDoc); } } }
/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { bool Proceed = false; if (msDoc.GetIsExternal()) { return; } if (msDoc.GetIsRedirect()) { return; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { ListViewItem lvItem = null; int Occurrences = 0; string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDescriptionLanguage(); string Description = msDoc.GetDescription(); int DescriptionLength = msDoc.GetDescriptionLength(); string PairKey = string.Join(":", UrlToDigest(Url), UrlToDigest(Description)); if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } else { Description = "MISSING"; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColPageLanguage].Text = PageLanguage; lvItem.SubItems[ColDetectedLanguage].Text = DetectedLanguage; lvItem.SubItems[ColOccurences].Text = Occurrences.ToString(); lvItem.SubItems[ColDescriptionText].Text = Description; lvItem.SubItems[ColLength].Text = DescriptionLength.ToString(); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDescriptions 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(PageLanguage); lvItem.SubItems.Add(DetectedLanguage); lvItem.SubItems.Add(Occurrences.ToString()); lvItem.SubItems.Add(Description); lvItem.SubItems.Add(DescriptionLength.ToString()); ListViewItems.Add(lvItem); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDescriptions 2: {0}", ex.Message)); } } if (lvItem != null) { lvItem.ForeColor = Color.Blue; // URL -------------------------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } // Description Language --------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Green; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green; if (DetectedLanguage != PageLanguage) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; } } else { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Gray; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray; } // Check Description Length ----------------------------------------// if (msDoc.GetIsInternal()) { if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen()) { lvItem.SubItems[ColUrl].ForeColor = Color.Red; lvItem.SubItems[ColOccurences].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Red; lvItem.SubItems[ColLength].ForeColor = Color.Red; } else if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen()) { lvItem.SubItems[ColUrl].ForeColor = Color.Red; lvItem.SubItems[ColOccurences].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Red; lvItem.SubItems[ColLength].ForeColor = Color.Red; } else { lvItem.SubItems[ColOccurences].ForeColor = Color.Green; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Green; lvItem.SubItems[ColLength].ForeColor = Color.Green; } } else { lvItem.SubItems[ColOccurences].ForeColor = Color.Gray; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Gray; lvItem.SubItems[ColLength].ForeColor = Color.Gray; } } } }
/** Render One ************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { lock (this.DisplayListViewLock) { Dictionary <string, string> StructureItems = new Dictionary <string, string>(); ListViewItem lvItem = null; string TitleLanguage = msDoc.GetTitleLanguage(); string DescriptionLanguage = msDoc.GetDescriptionLanguage(); string BodyTextLanguage = msDoc.GetDocumentTextLanguage(); int StatusCode = (int)msDoc.GetStatusCode(); string PairKey = UrlToDigest(Url).ToString(); if (string.IsNullOrEmpty(TitleLanguage)) { TitleLanguage = ""; } if (string.IsNullOrEmpty(DescriptionLanguage)) { DescriptionLanguage = ""; } if (string.IsNullOrEmpty(BodyTextLanguage)) { BodyTextLanguage = ""; } // BEGIN: Columns ----------------------------------------------------// StructureItems.Add(MacroscopeConstants.Url, msDoc.GetUrl()); StructureItems.Add(MacroscopeConstants.StatusCode, StatusCode.ToString()); StructureItems.Add(MacroscopeConstants.Status, msDoc.GetStatusCode().ToString()); StructureItems.Add(MacroscopeConstants.IsRedirect, msDoc.GetIsRedirect().ToString()); StructureItems.Add(MacroscopeConstants.RobotsRule, msDoc.GetAllowedByRobotsAsString()); StructureItems.Add(MacroscopeConstants.Duration, msDoc.GetDurationInSecondsFormatted()); StructureItems.Add(MacroscopeConstants.ContentType, msDoc.GetMimeType()); { string Charset = msDoc.GetCharacterSet(); if (string.IsNullOrEmpty(Charset)) { Charset = ""; } StructureItems.Add(MacroscopeConstants.Charset, Charset); } { string LocaleCode = msDoc.GetLocale(); if (string.IsNullOrEmpty(LocaleCode)) { LocaleCode = ""; } StructureItems.Add(MacroscopeConstants.Locale, LocaleCode); } { string LanguageCode = msDoc.GetIsoLanguageCode(); if (string.IsNullOrEmpty(LanguageCode)) { LanguageCode = ""; } StructureItems.Add(MacroscopeConstants.Language, LanguageCode); } StructureItems.Add(MacroscopeConstants.DateCrawled, msDoc.GetCrawledDate()); StructureItems.Add(MacroscopeConstants.DateServer, msDoc.GetDateServer()); StructureItems.Add(MacroscopeConstants.DateModified, msDoc.GetDateModified()); StructureItems.Add(MacroscopeConstants.DateExpires, msDoc.GetDateExpires()); StructureItems.Add(MacroscopeConstants.Canonical, msDoc.GetCanonical()); StructureItems.Add(MacroscopeConstants.PageDepth, msDoc.GetDepth().ToString()); StructureItems.Add(MacroscopeConstants.Inlinks, msDoc.CountInlinks().ToString()); StructureItems.Add(MacroscopeConstants.Outlinks, msDoc.CountOutlinks().ToString()); StructureItems.Add(MacroscopeConstants.HyperlinksIn, msDoc.CountHyperlinksIn().ToString()); StructureItems.Add(MacroscopeConstants.HyperlinksOut, msDoc.CountHyperlinksOut().ToString()); // TODO: This is too slow: /* * { * List<decimal> HyperlinkRatio = DocCollection.GetDocumentHyperlinksRatio( Url: Url ); * StructureItems.Add( MacroscopeConstants.HyperlinksInRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 0 ] ) ); * StructureItems.Add( MacroscopeConstants.HyperlinksOutRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 1 ] ) ); * } */ StructureItems.Add(MacroscopeConstants.Author, msDoc.GetAuthor()); StructureItems.Add(MacroscopeConstants.Title, msDoc.GetTitle()); StructureItems.Add(MacroscopeConstants.TitleLen, msDoc.GetTitleLength().ToString()); StructureItems.Add(MacroscopeConstants.TitleLang, TitleLanguage); StructureItems.Add(MacroscopeConstants.Description, msDoc.GetDescription()); StructureItems.Add(MacroscopeConstants.DescriptionLen, msDoc.GetDescriptionLength().ToString()); StructureItems.Add(MacroscopeConstants.DescriptionLang, DescriptionLanguage); StructureItems.Add(MacroscopeConstants.Keywords, msDoc.GetKeywords()); StructureItems.Add(MacroscopeConstants.KeywordsLen, msDoc.GetKeywordsLength().ToString()); StructureItems.Add(MacroscopeConstants.KeywordsCount, msDoc.GetKeywordsCount().ToString()); StructureItems.Add(MacroscopeConstants.BodyTextLang, BodyTextLanguage); for (ushort HeadingLevel = 1; HeadingLevel <= MaxHeadingsDisplayed; HeadingLevel++) { List <string> HeadingList = msDoc.GetHeadings(HeadingLevel: HeadingLevel); string HeadingText = ""; if (HeadingList.Count > 0) { HeadingText = HeadingList[0]; } StructureItems.Add(string.Format(MacroscopeConstants.Hn, HeadingLevel), HeadingText); } StructureItems.Add(MacroscopeConstants.ErrorCondition, msDoc.GetErrorCondition()); // END: Columns ------------------------------------------------------// if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; for (int i = 0; i < this.DisplayListView.Columns.Count; i++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { lvItem.ForeColor = Color.Blue; int StatusCodeColIndex = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.StatusCode); int StatusColIndex = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.Status); foreach (string ItemsKey in StructureItems.Keys) { int ColIndex = this.DisplayListView.Columns.IndexOfKey(ItemsKey); string Text = StructureItems[ItemsKey]; if (!string.IsNullOrEmpty(StructureItems[ItemsKey])) { lvItem.SubItems[ColIndex].Text = Text; } else { lvItem.SubItems[ColIndex].Text = ""; } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColIndex].ForeColor = Color.Green; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Gray; } if (ItemsKey.Equals(MacroscopeConstants.StatusCode)) { if ((StatusCode >= 200) && (StatusCode <= 299)) { lvItem.SubItems[ColIndex].ForeColor = Color.Green; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Green; lvItem.SubItems[StatusColIndex].ForeColor = Color.Green; } else if ((StatusCode >= 300) && (StatusCode <= 399)) { lvItem.SubItems[ColIndex].ForeColor = Color.Goldenrod; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Goldenrod; lvItem.SubItems[StatusColIndex].ForeColor = Color.Goldenrod; } else if ((StatusCode >= 400) && (StatusCode <= 599)) { lvItem.SubItems[ColIndex].ForeColor = Color.Red; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Red; lvItem.SubItems[StatusColIndex].ForeColor = Color.Red; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Blue; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Blue; lvItem.SubItems[StatusColIndex].ForeColor = Color.Blue; } if (StatusCode == 410) { lvItem.SubItems[ColIndex].ForeColor = Color.Purple; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Purple; lvItem.SubItems[StatusColIndex].ForeColor = Color.Purple; } } if (ItemsKey == MacroscopeConstants.RobotsRule) { if (Text.ToLower() == "disallowed") { lvItem.SubItems[ColIndex].ForeColor = Color.Red; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Green; } } if (ItemsKey == MacroscopeConstants.IsRedirect) { if (Text.ToLower() == "true") { lvItem.SubItems[ColIndex].ForeColor = Color.Red; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Gray; } } } } else { DebugMsg(string.Format("MacroscopeDisplayStructure: {0}", "lvItem is NULL")); } } }
/**************************************************************************/ public List <KeyValuePair <string, KEYWORD_STATUS> > AnalyzeKeywordPresence(MacroscopeDocument msDoc) { string Keywords = msDoc.GetKeywords().ToLower(); string TitleText = msDoc.GetTitle().ToLower(); string DescriptionText = msDoc.GetDescription().ToLower(); string BodyText = msDoc.GetDocumentTextCleaned().ToLower(); List <string> KeywordsList = new List <string>(); List <KeyValuePair <string, KEYWORD_STATUS> > KeywordPresence = new List <KeyValuePair <string, KEYWORD_STATUS> >(); bool KeywordsMetatagFilled = false; foreach (string Keyword in Keywords.Split(',')) { string KeywordCleaned = MacroscopeStringTools.CleanWhiteSpace(Keyword); if (KeywordCleaned.Length > 0) { KeywordsList.Add(KeywordCleaned); KeywordsMetatagFilled = true; } } if (KeywordsMetatagFilled) { foreach (string Keyword in KeywordsList) { try { string kw = this.GetPatternForLanguage(msDoc: msDoc, Keyword: Keyword); if (Regex.IsMatch(TitleText, kw)) { KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.PRESENT_IN_TITLE)); } else { KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MISSING_IN_TITLE)); } if (Regex.IsMatch(DescriptionText, kw)) { KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.PRESENT_IN_DESCRIPTION)); } else { KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MISSING_IN_DESCRIPTION)); } if (Regex.IsMatch(BodyText, kw)) { KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.PRESENT_IN_BODY)); } else { KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MISSING_IN_BODY)); } } catch (Exception ex) { this.DebugMsg(ex.Message); KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>(Keyword, KEYWORD_STATUS.MALFORMED_KEYWORDS_METATAG)); } } } else { KeywordPresence.Add(new KeyValuePair <string, KEYWORD_STATUS>("", KEYWORD_STATUS.KEYWORDS_METATAG_EMPTY)); } return(KeywordPresence); }
/**************************************************************************/ private void BuildWorksheetPageDescriptions( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Page Language"; iCol++; ws.Cell(iRow, iCol).Value = "Detected Language"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Description"; iCol++; ws.Cell(iRow, iCol).Value = "Description Length"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { iCol = 1; string Description = msDoc.GetDescription(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetTitleLanguage(); int Occurrences = 0; int DescriptionLength = msDoc.GetDescriptionLength(); if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); if (Occurrences > 1) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Description)); if (DescriptionLength <= 0) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); ws.Cell(iRow, iCol).Value = "MISSING"; } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, DescriptionLength); if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetOverview( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Redirect"; iCol++; ws.Cell(iRow, iCol).Value = "Duration"; iCol++; ws.Cell(iRow, iCol).Value = "Crawled Date"; iCol++; ws.Cell(iRow, iCol).Value = "Server Date"; iCol++; ws.Cell(iRow, iCol).Value = "Modified Date"; iCol++; ws.Cell(iRow, iCol).Value = "Expires Date"; iCol++; ws.Cell(iRow, iCol).Value = "Content-Type"; iCol++; ws.Cell(iRow, iCol).Value = "Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Language"; iCol++; ws.Cell(iRow, iCol).Value = "Canonical"; iCol++; ws.Cell(iRow, iCol).Value = "Page Depth"; iCol++; ws.Cell(iRow, iCol).Value = "Links In"; iCol++; ws.Cell(iRow, iCol).Value = "Links Out"; iCol++; ws.Cell(iRow, iCol).Value = "Hyperlinks In"; iCol++; ws.Cell(iRow, iCol).Value = "Hyperlinks Out"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; ws.Cell(iRow, iCol).Value = "Title Length"; iCol++; ws.Cell(iRow, iCol).Value = "Description"; iCol++; ws.Cell(iRow, iCol).Value = "Description Length"; iCol++; ws.Cell(iRow, iCol).Value = "Error Condition"; for (int i = 1; i <= iCol; i++) { ws.Cell(iRow, i).Style.Font.SetBold(); } } iColMax = iCol; iRow++; foreach (string sKey in DocCollection.DocumentKeys()) { iCol = 1; MacroscopeDocument msDoc = DocCollection.GetDocument(sKey); this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); iCol++; this.InsertAndFormatRedirectCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDurationInSecondsFormatted()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetCrawledDate()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateServer()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateModified()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateExpires()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetMimeType())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetLocale())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetIsoLanguageCode())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetCanonical())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDepth().ToString()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountInlinks()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountOutlinks()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksIn()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksOut()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetTitle())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetTitleLength().ToString()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetDescription())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDescriptionLength()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetErrorCondition())); iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetOverview( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Redirect"); ws.WriteField("Duration"); ws.WriteField("Crawled Date"); ws.WriteField("Server Date"); ws.WriteField("Modified Date"); ws.WriteField("Expires Date"); ws.WriteField("Content-Type"); ws.WriteField("Locale"); ws.WriteField("Language"); ws.WriteField("Canonical"); ws.WriteField("Page Depth"); ws.WriteField("Links In"); ws.WriteField("Links Out"); ws.WriteField("Hyperlinks In"); ws.WriteField("Hyperlinks Out"); ws.WriteField("Title"); ws.WriteField("Title Length"); ws.WriteField("Description"); ws.WriteField("Description Length"); ws.WriteField("Error Condition"); ws.NextRecord(); } foreach (string Key in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Key); this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatStatusCodeCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); this.InsertAndFormatRedirectCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDurationInSecondsFormatted())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCrawledDate())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateServer())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateModified())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateExpires())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetMimeType())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetLocale())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetIsoLanguageCode())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCanonical())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDepth().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountInlinks().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountOutlinks().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksIn().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksOut().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitle())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitleLength().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescription())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescriptionLength().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetErrorCondition())); ws.NextRecord(); } }
/**************************************************************************/ private void BuildWorksheetPageDescriptions( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Page Language"); ws.WriteField("Detected Language"); ws.WriteField("Occurrences"); ws.WriteField("Description"); ws.WriteField("Description Length"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { string Description = msDoc.GetDescription(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetTitleLanguage(); int Occurrences = 0; int DescriptionLength = msDoc.GetDescriptionLength(); if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Description)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DescriptionLength.ToString())); ws.NextRecord(); } } }