/**************************************************************************/

        public static IMacroscopeAnalyzeReadability AnalyzerFactory(MacroscopeDocument msDoc)
        {
            IMacroscopeAnalyzeReadability Analyzer = null;
            string IsoLanguageCode = msDoc.GetIsoLanguageCode();

            if (!string.IsNullOrEmpty(IsoLanguageCode))
            {
                Analyzer = MacroscopeAnalyzeReadability.AnalyzerFactory(IsoLanguageCode: IsoLanguageCode);
            }

            return(Analyzer);
        }
Example #2
0
        /**************************************************************************/

        private void BuildWorksheetPageText(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Page Locale");
                ws.WriteField("Page Language");
                ws.WriteField("Detected Language");
                ws.WriteField("Word Count");
                ws.WriteField("Readability Method");
                ws.WriteField("Readability Grade");
                ws.WriteField("Readability Grade Description");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                switch (msDoc.GetDocumentType())
                {
                case MacroscopeConstants.DocumentType.HTML:
                    Proceed = true;
                    break;

                case MacroscopeConstants.DocumentType.PDF:
                    Proceed = true;
                    break;

                default:
                    break;
                }

                if (Proceed)
                {
                    string PageLocale                  = msDoc.GetLocale();
                    string PageLanguage                = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage            = msDoc.GetDocumentTextLanguage();
                    int    WordCount                   = msDoc.GetWordCount();
                    string ReadabilityGradeType        = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(ReadabilityMethod: msDoc.GetReadabilityGradeMethod());
                    string ReadabilityGrade            = msDoc.GetReadabilityGrade().ToString("00.00");
                    string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription();

                    if (string.IsNullOrEmpty(PageLocale))
                    {
                        PageLocale = "";
                    }

                    if (string.IsNullOrEmpty(PageLanguage))
                    {
                        PageLanguage = "";
                    }

                    if (string.IsNullOrEmpty(DetectedLanguage))
                    {
                        DetectedLanguage = "";
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLocale));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(WordCount.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeType));
                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGrade));
                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeDescription));

                    ws.NextRecord();
                }
            }
        }
        /**************************************************************************/

        public List <KeyValuePair <string, string> > DetailDocumentDetails()
        {
            List <KeyValuePair <string, string> > DetailsList = new List <KeyValuePair <string, string> >();

            DetailsList.Add(new KeyValuePair <string, string>("URL", this.GetUrl()));

            DetailsList.Add(new KeyValuePair <string, string>("Status Code", ((int)this.GetStatusCode()).ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Status", this.GetStatusCode().ToString()));

            DetailsList.Add(new KeyValuePair <string, string>("Robots", this.GetAllowedByRobotsAsString()));

            DetailsList.Add(new KeyValuePair <string, string>("Crawled Date", this.GetCrawledDate()));

            DetailsList.Add(new KeyValuePair <string, string>("Error Condition", this.GetErrorCondition()));

            DetailsList.Add(new KeyValuePair <string, string>("Duration (seconds)", this.GetDurationInSecondsFormatted()));

            DetailsList.Add(new KeyValuePair <string, string>("HTST Policy Enabled", this.HypertextStrictTransportPolicy.ToString()));

            DetailsList.Add(new KeyValuePair <string, string>("Content Type", this.GetMimeType()));
            DetailsList.Add(new KeyValuePair <string, string>("Content Length", this.ContentLength.ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Encoding", this.ContentEncoding));

            DetailsList.Add(new KeyValuePair <string, string>("Compressed", this.GetIsCompressed().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Compression Method", this.GetCompressionMethod()));

            DetailsList.Add(new KeyValuePair <string, string>("Date", this.GetDateServer()));
            DetailsList.Add(new KeyValuePair <string, string>("Date Modified", this.GetDateModified()));
            DetailsList.Add(new KeyValuePair <string, string>("Expires", this.GetDateExpires()));

            DetailsList.Add(new KeyValuePair <string, string>("Locale", this.GetLocale()));
            DetailsList.Add(new KeyValuePair <string, string>("Language", this.GetIsoLanguageCode()));

            {
                Encoding TextEncoding      = this.GetCharacterEncoding();
                string   TextEncodingValue = "";

                if (TextEncoding != null)
                {
                    TextEncodingValue = TextEncoding.EncodingName;
                }

                DetailsList.Add(new KeyValuePair <string, string>("Character Encoding", TextEncodingValue));
            }

            DetailsList.Add(new KeyValuePair <string, string>("Character Set", this.GetCharacterSet()));

            DetailsList.Add(new KeyValuePair <string, string>("Canonical", this.GetCanonical()));

            DetailsList.Add(new KeyValuePair <string, string>("Link: Shortlink", this.GetLinkShortLink()));
            DetailsList.Add(new KeyValuePair <string, string>("Link: First", this.GetLinkFirst()));
            DetailsList.Add(new KeyValuePair <string, string>("Link: Prev", this.GetLinkPrev()));
            DetailsList.Add(new KeyValuePair <string, string>("Link: Next", this.GetLinkNext()));
            DetailsList.Add(new KeyValuePair <string, string>("Link: Last", this.GetLinkLast()));

            DetailsList.Add(new KeyValuePair <string, string>("Redirect", this.GetIsRedirect().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Redirected From", this.UrlRedirectFrom));

            DetailsList.Add(new KeyValuePair <string, string>("Referrer Meta Tag", this.GetMetaTag("referrer")));

            DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks In Count", this.CountHyperlinksIn().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks Out Count", this.CountHyperlinksOut().ToString()));

            {
                List <decimal> HyperlinkRatio = this.DocCollection.GetDocumentHyperlinksRatio(Url: this.GetUrl());
                DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks In Ratio", string.Format("{0:0.00}%", HyperlinkRatio[0])));
                DetailsList.Add(new KeyValuePair <string, string>("Hyperlinks Out Ratio", string.Format("{0:0.00}%", HyperlinkRatio[1])));
            }

            DetailsList.Add(new KeyValuePair <string, string>("HrefLang Count", this.GetHrefLangs().Count.ToString()));

            DetailsList.Add(new KeyValuePair <string, string>("Title", this.GetTitle()));
            DetailsList.Add(new KeyValuePair <string, string>("Title Length", this.GetTitleLength().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Title Pixel Width", this.GetTitlePixelWidth().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Probable Title Language", this.GetTitleLanguage()));

            DetailsList.Add(new KeyValuePair <string, string>("Author", this.GetAuthor()));

            DetailsList.Add(new KeyValuePair <string, string>("Description", this.GetDescription()));
            DetailsList.Add(new KeyValuePair <string, string>("Description Length", this.GetDescriptionLength().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Probable Description Language", this.GetDescriptionLanguage()));

            DetailsList.Add(new KeyValuePair <string, string>("Keywords", this.GetKeywords()));
            DetailsList.Add(new KeyValuePair <string, string>("Keywords Length", this.GetKeywordsLength().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Keywords Count", this.GetKeywordsCount().ToString()));

            DetailsList.Add(new KeyValuePair <string, string>("Probable Document Text Language", this.GetDocumentTextLanguage()));

            DetailsList.Add(new KeyValuePair <string, string>("Levenshtein Fingerpring", this.GetLevenshteinFingerprint()));

            {
                string ReadabilityGradeText;

                ReadabilityGradeText = string.Format(
                    "{0} : {1} : {2}",
                    MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(
                        ReadabilityMethod: this.GetReadabilityGradeMethod()
                        ),
                    this.GetReadabilityGrade().ToString("00.00"),
                    this.GetReadabilityGradeDescription()
                    );

                DetailsList.Add(new KeyValuePair <string, string>("Text Readability", ReadabilityGradeText));
            }

            DetailsList.Add(new KeyValuePair <string, string>("AltText", this.GetAltText()));

            DetailsList.Add(new KeyValuePair <string, string>("Checksum", this.GetChecksum()));
            DetailsList.Add(new KeyValuePair <string, string>("ETag", this.GetEtag()));

            for (ushort HeadingLevel = 1; HeadingLevel <= 6; HeadingLevel++)
            {
                string HeadingText;
                if (this.GetHeadings(HeadingLevel).Count > 0)
                {
                    HeadingText = this.GetHeadings(HeadingLevel)[0];
                }
                else
                {
                    HeadingText = null;
                }
                if (HeadingText != null)
                {
                    DetailsList.Add(new KeyValuePair <string, string>(string.Format("H{0}", HeadingLevel), HeadingText));
                    DetailsList.Add(new KeyValuePair <string, string>(string.Format("H{0} Length", HeadingLevel), HeadingText.Length.ToString()));
                }
            }

            DetailsList.Add(new KeyValuePair <string, string>("Page Depth", this.GetDepth().ToString()));

            DetailsList.Add(new KeyValuePair <string, string>("Server Name", this.GetServerName()));

            DetailsList.Add(new KeyValuePair <string, string>("Scheme", this.GetScheme()));
            DetailsList.Add(new KeyValuePair <string, string>("Host and Port", this.GetHostAndPort()));
            DetailsList.Add(new KeyValuePair <string, string>("Host", this.GetHostname()));
            DetailsList.Add(new KeyValuePair <string, string>("Port", this.GetPort().ToString()));
            DetailsList.Add(new KeyValuePair <string, string>("Path", this.GetPath()));
            DetailsList.Add(new KeyValuePair <string, string>("Query", this.GetQueryString()));
            DetailsList.Add(new KeyValuePair <string, string>("Fragment", this.GetFragment()));

            DetailsList.Add(new KeyValuePair <string, string>("Server Addresses", this.GetHostAddressesAsCsv()));

            return(DetailsList);
        }
        /**************************************************************************/

        private void BuildWorksheetPageText(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Detected Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Word Count";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Readability Method";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Readability Grade";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Readability Grade Description";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                switch (msDoc.GetDocumentType())
                {
                case MacroscopeConstants.DocumentType.HTML:
                    Proceed = true;
                    break;

                case MacroscopeConstants.DocumentType.PDF:
                    Proceed = true;
                    break;

                default:
                    break;
                }

                if (Proceed)
                {
                    iCol = 1;

                    string PageLocale                  = msDoc.GetLocale();
                    string PageLanguage                = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage            = msDoc.GetDocumentTextLanguage();
                    int    WordCount                   = msDoc.GetWordCount();
                    string ReadabilityGradeType        = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(ReadabilityMethod: msDoc.GetReadabilityGradeMethod());
                    string ReadabilityGrade            = msDoc.GetReadabilityGrade().ToString("00.00");
                    string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription();

                    if (string.IsNullOrEmpty(PageLocale))
                    {
                        PageLocale = "";
                    }

                    if (string.IsNullOrEmpty(PageLanguage))
                    {
                        PageLanguage = "";
                    }

                    if (string.IsNullOrEmpty(DetectedLanguage))
                    {
                        DetectedLanguage = "";
                    }

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLocale));

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, WordCount);

                    if (msDoc.GetIsInternal())
                    {
                        if (WordCount > 0)
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                        }
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeType);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGrade);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeDescription);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            bool Proceed = false;

            if (msDoc.GetIsExternal())
            {
                return;
            }

            if (msDoc.GetIsRedirect())
            {
                return;
            }

            switch (msDoc.GetDocumentType())
            {
            case MacroscopeConstants.DocumentType.HTML:
                Proceed = true;
                break;

            case MacroscopeConstants.DocumentType.PDF:
                Proceed = true;
                break;

            default:
                break;
            }

            if (Proceed)
            {
                string PageLocale           = msDoc.GetLocale();
                string PageLanguage         = msDoc.GetIsoLanguageCode();
                string DetectedLanguage     = msDoc.GetDocumentTextLanguage();
                int    WordCount            = msDoc.GetWordCount();
                string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(
                    ReadabilityMethod: msDoc.GetReadabilityGradeMethod()
                    );
                string       ReadabilityGrade            = msDoc.GetReadabilityGrade().ToString("00.00");
                string       ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription();
                string       PairKey = string.Join("", Url);
                ListViewItem lvItem  = null;

                if (string.IsNullOrEmpty(PageLocale))
                {
                    PageLocale = "";
                }

                if (string.IsNullOrEmpty(PageLanguage))
                {
                    PageLanguage = "";
                }

                if (string.IsNullOrEmpty(DetectedLanguage))
                {
                    DetectedLanguage = "";
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[ColUrl].Text                         = Url;
                        lvItem.SubItems[ColLocale].Text                      = PageLocale;
                        lvItem.SubItems[ColPageLanguage].Text                = PageLanguage;
                        lvItem.SubItems[ColDetectedLanguage].Text            = DetectedLanguage;
                        lvItem.SubItems[ColWordCount].Text                   = WordCount.ToString();
                        lvItem.SubItems[ColReadabilityGradeType].Text        = ReadabilityGradeType;
                        lvItem.SubItems[ColReadabilityGrade].Text            = ReadabilityGrade;
                        lvItem.SubItems[ColReadabilityGradeDescription].Text = ReadabilityGradeDescription;
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayPageText 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[ColUrl].Text = Url;
                        lvItem.SubItems.Add(PageLocale);
                        lvItem.SubItems.Add(PageLanguage);
                        lvItem.SubItems.Add(DetectedLanguage);
                        lvItem.SubItems.Add(WordCount.ToString());
                        lvItem.SubItems.Add(ReadabilityGradeType);
                        lvItem.SubItems.Add(ReadabilityGrade);
                        lvItem.SubItems.Add(ReadabilityGradeDescription);

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayPageText 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    // URL -------------------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    // Page Locale -----------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColLocale].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColLocale].ForeColor = Color.Gray;
                    }

                    // Page Language ---------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Green;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green;

                        if (DetectedLanguage != PageLanguage)
                        {
                            lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Red;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Gray;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray;
                    }

                    // Word Count ------------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        if (WordCount > 0)
                        {
                            lvItem.SubItems[ColWordCount].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColWordCount].ForeColor = Color.Red;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[ColWordCount].ForeColor = Color.Gray;
                    }
                }
            }
        }