Exemple #1
0
        /// <summary>
        /// Basic HTML Metrics
        /// </summary>
        /// <param name="page"></param>
        /// <param name="output"></param>
        /// <returns></returns>
        public static metricsReport getHtmlMetrics(crawledPage page, metricsSettings settings,
                                                   metricsReport output = null)
        {
            if (output == null)
            {
                output = new metricsReport(page.result.HtmlDocument as IXPathNavigable);
            }


            getMetaReport(page, output);

            int linkInner = page.links.byScope[linkScope.inner].Count;



            output.report("FV01_linkOuter", page.links.byScope[linkScope.outer].Count);
            output.report("FV02_linkInner", linkInner);

            output.report("FV31_cListStructures", htmlDefinitions.HTMLTags_listStructureTags, true);

            output.report("FV31_cListStructures", htmlDefinitions.HTMLTags_listStructureTags, true);
            output.report("FV32_cTableStructures", htmlDefinitions.HTMLTags_tableStructureTags, true);

            output.report("FV41_cHeadingTags", htmlDefinitions.HTMLTags_headingTags, true);
            output.report("FV42_cStructureTags", htmlDefinitions.HTMLTags_allStructureTags, true);


            reportEntryBase _entry = output.report("FV43_cMultiMediaTags", htmlDefinitions.HTMLTags_multimediaTags, true);

            if (settings.flags.HasFlag(metricsFlag.downloadPluginLinkAsMultimediaTag))
            {
                var c = (int)_entry.Value;
                foreach (link l in page.links.byScope[linkScope.outer])
                {
                    if (l.domain == "www.adobe.com")
                    {
                        c = c + 1;
                    }
                }
                _entry.Value = c;
            }

            //Int32 cMultiMediaTags = output["FV43_cMultiMediaTags"].Value.imbToNumber(typeof (Int32));


            output.report("FV44_cImageTags", htmlDefinitions.HTMLTags_multimediaTags, true);


            return(output);
        }
Exemple #2
0
        /// <summary>
        /// Pravi report sa ne-standardnim meta informacijama a standardne smesta u page objekat. Poziva se automatski iz crawlerAgentContextOperations
        /// </summary>
        /// <param name="page"></param>
        /// <param name="output"></param>
        /// <returns></returns>
        public static metricsReport getMetaReport(this crawledPage page, metricsReport output = null)
        {
            if (output == null)
            {
                output = new metricsReport(page.result.HtmlDocument as IXPathNavigable);
            }
            var rt = output.report("META_metanodes", htmlDefinitions.HTMLTags_metaTags);


            // var _allMetaTags = page.xmlDocument.queryXPath(imbXmlXPathTools.makeXPathForAllNodes(htmlDefinitions.HTMLTags_metaTags));
            //  XmlNode old = null;
            string _name = "";

            foreach (IXPathNavigable Ixn in rt.nodes)
            {
                XPathNavigator xn;
                if (Ixn is XPathNavigator)
                {
                    xn = Ixn as XPathNavigator;
                }
                else
                {
                    xn = Ixn.CreateNavigator();
                }


                switch (xn.Name.ToLower())
                {
                case "title":
                    page.pageCaption = xn.Value;

                    output.report("title", page.pageCaption, reportEntryGroups.META);
                    break;

                case "meta":

                    _name = xn.getAttributeValue("name").ToLower();
                    switch (_name)
                    {
                    case "application-name":
                    case "generator":
                    case "author":
                    case "google-site-verification":
                    default:
                        if (!string.IsNullOrEmpty(_name))
                        {
                            output.report(_name, xn.getAttributeValue("content"), reportEntryGroups.META);
                        }
                        break;

                    case "keywords":
                        page.pageKeywords =
                            Enumerable.ToList <string>(xn.getAttributeValue("content").Split(htmlDefinitions.HTMLMeta_keywordsSepparators,
                                                                                             StringSplitOptions.RemoveEmptyEntries));
                        break;

                    case "description":
                        page.pageDescription = xn.getAttributeValue("content");
                        break;
                    }
                    break;
                }
            }
            return(output);
        }