/**************************************************************************/ public static IMacroscopeAnalyzeReadability AnalyzerFactory(MacroscopeDocument msDoc) { IMacroscopeAnalyzeReadability Analyzer = null; string IsoLanguageCode = msDoc.GetIsoLanguageCode(); if (!string.IsNullOrEmpty(IsoLanguageCode)) { Analyzer = MacroscopeAnalyzeReadability.AnalyzerFactory(IsoLanguageCode: IsoLanguageCode); } return(Analyzer); }
/**************************************************************************/ private void BuildWorksheetPageText( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Page Locale"); ws.WriteField("Page Language"); ws.WriteField("Detected Language"); ws.WriteField("Word Count"); ws.WriteField("Readability Method"); ws.WriteField("Readability Grade"); ws.WriteField("Readability Grade Description"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { string PageLocale = msDoc.GetLocale(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDocumentTextLanguage(); int WordCount = msDoc.GetWordCount(); string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(ReadabilityMethod: msDoc.GetReadabilityGradeMethod()); string ReadabilityGrade = msDoc.GetReadabilityGrade().ToString("00.00"); string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription(); if (string.IsNullOrEmpty(PageLocale)) { PageLocale = ""; } if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLocale)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(WordCount.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeType)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGrade)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeDescription)); ws.NextRecord(); } } }
/**************************************************************************/ public List <KeyValuePair <string, string> > DetailDocumentDetails() { List <KeyValuePair <string, string> > DetailsList = new List <KeyValuePair <string, string> >(); DetailsList.Add(new KeyValuePair <string, string>("URL", this.GetUrl())); DetailsList.Add(new KeyValuePair <string, string>("Status Code", ((int)this.GetStatusCode()).ToString())); DetailsList.Add(new KeyValuePair <string, string>("Status", this.GetStatusCode().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Robots", this.GetAllowedByRobotsAsString())); DetailsList.Add(new KeyValuePair <string, string>("Crawled Date", this.GetCrawledDate())); DetailsList.Add(new KeyValuePair <string, string>("Error Condition", this.GetErrorCondition())); DetailsList.Add(new KeyValuePair <string, string>("Duration (seconds)", this.GetDurationInSecondsFormatted())); DetailsList.Add(new KeyValuePair <string, string>("HTST Policy Enabled", this.HypertextStrictTransportPolicy.ToString())); DetailsList.Add(new KeyValuePair <string, string>("Content Type", this.GetMimeType())); DetailsList.Add(new KeyValuePair <string, string>("Content Length", this.ContentLength.ToString())); DetailsList.Add(new KeyValuePair <string, string>("Encoding", this.ContentEncoding)); DetailsList.Add(new KeyValuePair <string, string>("Compressed", this.GetIsCompressed().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Compression Method", this.GetCompressionMethod())); DetailsList.Add(new KeyValuePair <string, string>("Date", this.GetDateServer())); DetailsList.Add(new KeyValuePair <string, string>("Date Modified", this.GetDateModified())); DetailsList.Add(new KeyValuePair <string, string>("Expires", this.GetDateExpires())); DetailsList.Add(new KeyValuePair <string, string>("Locale", this.GetLocale())); DetailsList.Add(new KeyValuePair <string, string>("Language", this.GetIsoLanguageCode())); { Encoding TextEncoding = this.GetCharacterEncoding(); string TextEncodingValue = ""; if (TextEncoding != null) { TextEncodingValue = TextEncoding.EncodingName; } DetailsList.Add(new KeyValuePair <string, string>("Character Encoding", TextEncodingValue)); } DetailsList.Add(new KeyValuePair <string, string>("Character Set", this.GetCharacterSet())); DetailsList.Add(new KeyValuePair <string, string>("Canonical", this.GetCanonical())); DetailsList.Add(new KeyValuePair <string, string>("Link: Shortlink", this.GetLinkShortLink())); DetailsList.Add(new KeyValuePair <string, string>("Link: First", this.GetLinkFirst())); DetailsList.Add(new KeyValuePair <string, string>("Link: Prev", this.GetLinkPrev())); DetailsList.Add(new KeyValuePair <string, string>("Link: Next", this.GetLinkNext())); DetailsList.Add(new KeyValuePair <string, string>("Link: Last", this.GetLinkLast())); DetailsList.Add(new KeyValuePair <string, string>("Redirect", this.GetIsRedirect().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Redirected From", this.UrlRedirectFrom)); DetailsList.Add(new KeyValuePair <string, string>("Referrer Meta Tag", this.GetMetaTag("referrer"))); DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks In Count", this.CountHyperlinksIn().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks Out Count", this.CountHyperlinksOut().ToString())); { List <decimal> HyperlinkRatio = this.DocCollection.GetDocumentHyperlinksRatio(Url: this.GetUrl()); DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks In Ratio", string.Format("{0:0.00}%", HyperlinkRatio[0]))); DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks Out Ratio", string.Format("{0:0.00}%", HyperlinkRatio[1]))); } DetailsList.Add(new KeyValuePair <string, string>("HrefLang Count", this.GetHrefLangs().Count.ToString())); DetailsList.Add(new KeyValuePair <string, string>("Title", this.GetTitle())); DetailsList.Add(new KeyValuePair <string, string>("Title Length", this.GetTitleLength().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Title Pixel Width", this.GetTitlePixelWidth().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Probable Title Language", this.GetTitleLanguage())); DetailsList.Add(new KeyValuePair <string, string>("Author", this.GetAuthor())); DetailsList.Add(new KeyValuePair <string, string>("Description", this.GetDescription())); DetailsList.Add(new KeyValuePair <string, string>("Description Length", this.GetDescriptionLength().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Probable Description Language", this.GetDescriptionLanguage())); DetailsList.Add(new KeyValuePair <string, string>("Keywords", this.GetKeywords())); DetailsList.Add(new KeyValuePair <string, string>("Keywords Length", this.GetKeywordsLength().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Keywords Count", this.GetKeywordsCount().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Probable Document Text Language", this.GetDocumentTextLanguage())); DetailsList.Add(new KeyValuePair <string, string>("Levenshtein Fingerpring", this.GetLevenshteinFingerprint())); { string ReadabilityGradeText; ReadabilityGradeText = string.Format( "{0} : {1} : {2}", MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod( ReadabilityMethod: this.GetReadabilityGradeMethod() ), this.GetReadabilityGrade().ToString("00.00"), this.GetReadabilityGradeDescription() ); DetailsList.Add(new KeyValuePair <string, string>("Text Readability", ReadabilityGradeText)); } DetailsList.Add(new KeyValuePair <string, string>("AltText", this.GetAltText())); DetailsList.Add(new KeyValuePair <string, string>("Checksum", this.GetChecksum())); DetailsList.Add(new KeyValuePair <string, string>("ETag", this.GetEtag())); for (ushort HeadingLevel = 1; HeadingLevel <= 6; HeadingLevel++) { string HeadingText; if (this.GetHeadings(HeadingLevel).Count > 0) { HeadingText = this.GetHeadings(HeadingLevel)[0]; } else { HeadingText = null; } if (HeadingText != null) { DetailsList.Add(new KeyValuePair <string, string>(string.Format("H{0}", HeadingLevel), HeadingText)); DetailsList.Add(new KeyValuePair <string, string>(string.Format("H{0} Length", HeadingLevel), HeadingText.Length.ToString())); } } DetailsList.Add(new KeyValuePair <string, string>("Page Depth", this.GetDepth().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Server Name", this.GetServerName())); DetailsList.Add(new KeyValuePair <string, string>("Scheme", this.GetScheme())); DetailsList.Add(new KeyValuePair <string, string>("Host and Port", this.GetHostAndPort())); DetailsList.Add(new KeyValuePair <string, string>("Host", this.GetHostname())); DetailsList.Add(new KeyValuePair <string, string>("Port", this.GetPort().ToString())); DetailsList.Add(new KeyValuePair <string, string>("Path", this.GetPath())); DetailsList.Add(new KeyValuePair <string, string>("Query", this.GetQueryString())); DetailsList.Add(new KeyValuePair <string, string>("Fragment", this.GetFragment())); DetailsList.Add(new KeyValuePair <string, string>("Server Addresses", this.GetHostAddressesAsCsv())); return(DetailsList); }
/**************************************************************************/ private void BuildWorksheetPageText( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Page Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Page Language"; iCol++; ws.Cell(iRow, iCol).Value = "Detected Language"; iCol++; ws.Cell(iRow, iCol).Value = "Word Count"; iCol++; ws.Cell(iRow, iCol).Value = "Readability Method"; iCol++; ws.Cell(iRow, iCol).Value = "Readability Grade"; iCol++; ws.Cell(iRow, iCol).Value = "Readability Grade Description"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { iCol = 1; string PageLocale = msDoc.GetLocale(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDocumentTextLanguage(); int WordCount = msDoc.GetWordCount(); string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(ReadabilityMethod: msDoc.GetReadabilityGradeMethod()); string ReadabilityGrade = msDoc.GetReadabilityGrade().ToString("00.00"); string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription(); if (string.IsNullOrEmpty(PageLocale)) { PageLocale = ""; } if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLocale)); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, WordCount); if (msDoc.GetIsInternal()) { if (WordCount > 0) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeType); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGrade); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeDescription); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { bool Proceed = false; if (msDoc.GetIsExternal()) { return; } if (msDoc.GetIsRedirect()) { return; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { string PageLocale = msDoc.GetLocale(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDocumentTextLanguage(); int WordCount = msDoc.GetWordCount(); string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod( ReadabilityMethod: msDoc.GetReadabilityGradeMethod() ); string ReadabilityGrade = msDoc.GetReadabilityGrade().ToString("00.00"); string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription(); string PairKey = string.Join("", Url); ListViewItem lvItem = null; if (string.IsNullOrEmpty(PageLocale)) { PageLocale = ""; } if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColLocale].Text = PageLocale; lvItem.SubItems[ColPageLanguage].Text = PageLanguage; lvItem.SubItems[ColDetectedLanguage].Text = DetectedLanguage; lvItem.SubItems[ColWordCount].Text = WordCount.ToString(); lvItem.SubItems[ColReadabilityGradeType].Text = ReadabilityGradeType; lvItem.SubItems[ColReadabilityGrade].Text = ReadabilityGrade; lvItem.SubItems[ColReadabilityGradeDescription].Text = ReadabilityGradeDescription; } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayPageText 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(PageLocale); lvItem.SubItems.Add(PageLanguage); lvItem.SubItems.Add(DetectedLanguage); lvItem.SubItems.Add(WordCount.ToString()); lvItem.SubItems.Add(ReadabilityGradeType); lvItem.SubItems.Add(ReadabilityGrade); lvItem.SubItems.Add(ReadabilityGradeDescription); ListViewItems.Add(lvItem); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayPageText 2: {0}", ex.Message)); } } if (lvItem != null) { lvItem.ForeColor = Color.Blue; // URL -------------------------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } // Page Locale -----------------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColLocale].ForeColor = Color.Green; } else { lvItem.SubItems[ColLocale].ForeColor = Color.Gray; } // Page Language ---------------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Green; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green; if (DetectedLanguage != PageLanguage) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; } } else { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Gray; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray; } // Word Count ------------------------------------------------------// if (msDoc.GetIsInternal()) { if (WordCount > 0) { lvItem.SubItems[ColWordCount].ForeColor = Color.Green; } else { lvItem.SubItems[ColWordCount].ForeColor = Color.Red; } } else { lvItem.SubItems[ColWordCount].ForeColor = Color.Gray; } } } }