Ejemplo n.º 1
0
        /// <summary>
        /// Builds the web page repository using <see cref="ISpiderTarget"/> crawl information
        /// </summary>
        /// <param name="target">Target information</param>
        /// <param name="site">The site to build page for</param>
        /// <param name="output">The output for console/log</param>
        /// <returns>Built or updated web page repository</returns>
        public imbMCWebPage BuildWebPage(ISpiderTarget target, imbMCWebSite site, ILogBuilder output = null)
        {
            imbMCWebPage page  = GetWebPage(site, target.url, true, output);
            ISpiderPage  sPage = target.page;

            page.entry.AnchorTextAll       = sPage.captions.toCsvInLine(",");
            page.entry.ClickDepth          = sPage.iterationDiscovery;
            page.entry.ResolvedRelativeURL = site.domainInfo.GetURLWithoutDomainName(target.url);

            page.deploy(page.entry);

            page.indexEntry = target.GetIndexPage();

            page.TextContent = target.pageText;
            page.name        = target.page.name;

            var htmlDoc = target.GetHtmlDocument();

            if (htmlDoc != null)
            {
                page.HtmlSourceCode = htmlDoc.DocumentNode.OuterHtml; // ; = target.contentBlocks;
            }
            else
            {
            }

            page.Blocks = new List <imbCommonModels.contentBlock.nodeBlock>();


            page.TermTable = target.tokens.GetCompiledTable(output);

            target.contentBlocks.ForEach(x => page.Blocks.Add(x));

            site.pageTable.AddOrUpdate(page.entry);

            page.SaveDataStructure(site.folder, output);

            return(page);
        }