/// <summary> /// Renders the document aspect. /// </summary> /// <param name="sb">The sb.</param> /// <param name="doc">The document.</param> /// <param name="htmlRelDoc">The HTML relative document.</param> /// <param name="flags">The flags.</param> /// <exception cref="NotImplementedException"></exception> public void RenderDocumentAspect(StringBuilder sb, WebSiteDocument doc, HtmlDocument htmlRelDoc, DocumentRenderInstructionFlags flags, String code = "") { if (doc != null) { if (flags.HasFlag(DocumentRenderInstructionFlags.page_title)) { HtmlNode titleNode = htmlRelDoc.DocumentNode.Descendants("title").FirstOrDefault(); if (titleNode != null) { String titleString = titleNode.InnerText; sb.AppendLine(titleString); } //sb.AppendLine(Render(htmlRelDoc.DocumentNode, DocumentRenderInstruction.XPATH_SELECT_TITLE)); } if (flags.HasFlag(DocumentRenderInstructionFlags.page_description)) { IEnumerable <HtmlNode> metaNodes = htmlRelDoc.DocumentNode.Descendants("meta"); String dsc = ""; if (metaNodes != null) { foreach (HtmlNode nd in metaNodes) { String metaName = nd.GetAttributeValue("name", ""); if (metaName.Equals("description", StringComparison.CurrentCultureIgnoreCase)) { dsc += nd.GetAttributeValue("content", ""); } } } if (dsc.isNullOrEmpty()) { } else { sb.AppendLine(dsc); } // sb.AppendLine(Render(htmlRelDoc.DocumentNode, DocumentRenderInstruction.XPATH_SELECT_DESCRIPTION)); } if (flags.HasFlag(DocumentRenderInstructionFlags.page_content)) { sb.AppendLine(BodyTextRender(htmlRelDoc, code)); } if (flags.HasFlag(DocumentRenderInstructionFlags.url_tokens)) { List <string> r = doc.path.getTokens(true, false, true, true, 1); foreach (String rk in r) { sb.Append(rk + " "); } } if (flags.HasFlag(DocumentRenderInstructionFlags.page_xpath)) { if (!code.isNullOrEmpty()) { sb.AppendLine(Render(htmlRelDoc.DocumentNode, code)); } } } }
/// <summary> /// Renders the provided HTML source /// </summary> /// <param name="webPage">The web page.</param> /// <param name="site">The site.</param> /// <param name="EnableRendering">if set to <c>true</c> [enable rendering].</param> /// <param name="htmlRelDoc">The HTML relative document.</param> /// <returns></returns> public TextDocumentLayerCollection RenderText(WebSiteDocument webPage, WebSiteDocuments site, Boolean EnableRendering = true, HtmlDocument htmlRelDoc = null) { TextDocumentLayerCollection output = new TextDocumentLayerCollection(); output.name = webPage.AssignedID; if (htmlRelDoc == null) { htmlRelDoc = HtmlDocumentCache.DefaultDocumentCache.GetDocument(webPage.AssignedID, webPage.HTMLSource); } foreach (DocumentRenderInstruction instruction in instructions) { String content = ""; StringBuilder sb = new StringBuilder(); if (EnableRendering) { DocumentRenderInstructionFlags flags = instruction.instructionFlags; if (flags.HasFlag(DocumentRenderInstructionFlags.cur_page)) { RenderDocumentAspect(sb, webPage, htmlRelDoc, flags, instruction.code); } if (flags.HasFlag(DocumentRenderInstructionFlags.select_links)) { if (site.extensions.graph == null) { throw new nlpException("WebGraph is null - can't render instruction [" + instruction.name + "]", "Graph is null for site [" + site.domain + "]"); } imbSCI.Graph.FreeGraph.freeGraphNodeAndLinks selection = site.extensions.graph.GetLinks(webPage.AssignedID, instruction.instructionFlags.HasFlag(DocumentRenderInstructionFlags.select_outbound_links), instruction.instructionFlags.HasFlag(DocumentRenderInstructionFlags.select_inbound_links)); foreach (var link in selection.links) { if (flags.HasFlag(DocumentRenderInstructionFlags.link_caption)) { if (link.linkLabel.isNullOrEmpty()) { } else { } sb.AppendLine(link.linkLabel); } } foreach (var node in selection.linkedNodeClones.Values) { if (flags.HasFlag(DocumentRenderInstructionFlags.select_rel_page)) { WebSiteDocument doc = site.documents.Where(x => x.AssignedID == node.name).FirstOrDefault(); RenderDocumentAspect(sb, doc, null, flags, instruction.code); } } } content = sb.ToString(); if (flags.HasFlag(DocumentRenderInstructionFlags.lower_case)) { content = content.ToLower(); } if (flags.HasFlag(DocumentRenderInstructionFlags.unique_tokens)) { List <string> r = content.getTokens(true, false, true, true, 1); List <String> ur = new List <string>(); foreach (String rk in r) { if (!ur.Contains(rk)) { ur.Add(rk); } } StringBuilder sbr = new StringBuilder(); foreach (String rk in ur) { sbr.Append(rk + " "); } content = sbr.ToString(); } content = TrimEmptySpace(content); } output.CreateLayer(instruction.name, content, Convert.ToInt32(instruction.weight)); } return(output); }