/// <summary> /// Busca recursivamente un nodo HTML dentro del arbol de capitulos /// </summary> /// <param name="element">El nodo HTML a buscar</param> /// <returns>El nodo que lo contiene. Null, si no se encontro.</returns> public ChmDocumentNode BuscarNodo(HtmlNode element, string aNameElement) { // Mirar si es el mismo nodo: if (this.HeaderTag != null && element != null) { if (!aNameElement.Equals("")) { if (this.AnchorNames.Contains(aNameElement)) { return(this); } } else { // Para evitar el error del about:blank en los src de las imagenes: string t1 = HeaderTag.OuterHtml.Replace("about:blank", "").Replace("about:", ""); string t2 = element.OuterHtml.Replace("about:blank", "").Replace("about:", ""); if (t1.Equals(t2)) { return(this); } } } // Sino , buscar en los hijos: foreach (ChmDocumentNode hijo in Children) { ChmDocumentNode resultado = hijo.BuscarNodo(element, aNameElement); if (resultado != null) { return(resultado); } } return(null); }
/// <summary> /// Repair or remove an internal link. /// Given a broken internal link, it searches a section title of the document with the /// same text of the broken link. If its found, the destination link is modified to point to /// that section. If a matching section is not found, the link will be removed and its content /// will be keept. /// </summary> /// <param name="link">The broken link</param> private void ReplaceBrokenLink(HtmlNode link) { try { // Get the text of the link string linkText = UnescapedInnerText(link).Trim(); // Seach a title with the same text of the link: ChmDocumentNode destinationTitle = Document.SearchBySectionTitle(linkText); if (destinationTitle != null) { // Replace the original internal broken link with this: SetAttributeValue(link, "href", destinationTitle.Href); } else { // No candidate title was found. Remove the link and keep its content foreach (HtmlNode child in link.ChildNodes) { link.ParentNode.InsertBefore(child, link); } link.ParentNode.RemoveChild(link); } } catch (Exception ex) { UI.Log("Error reparining a broken link", ChmLogLevel.ERROR); UI.Log(ex); } }
/// <summary> /// Tree section node constructor /// </summary> /// <param name="document">The document owner of this node</param> /// <param name="parent">Parent section of the section to create. null if the node to create is the root section.</param> /// <param name="node">HTML header tag for this section</param> /// <param name="ui">Application log. It can be null</param> public ChmDocumentNode(ChmDocument document, ChmDocumentNode parent, HtmlNode node, UserInterface ui) { this.Parent = parent; this.HeaderTag = node; Children = new List <ChmDocumentNode>(); HeaderLevel = HeaderTagLevel(node); DestinationFileName = ""; AnchorNames = new List <string>(); if (node != null) { // Check if the header tag has some anchor foreach (HtmlNode child in node.ChildNodes) { string name = ChmDocumentParser.GetAnchorName(child); if (name != null && name.Trim() != string.Empty) { AnchorNames.Add(name); } } if (AnchorNames.Count == 0) { // Si no tiene ningun nombre, darle uno artificial: int number = LatestCustomAnchorNumber++; string nodeName = "node" + number.ToString(); HtmlNode aTagElement = document.HtmlDoc.CreateElement("a"); // XHTML/HTML5 uses the "id" attribute, and HTML4 "name" // There is no safe way to check if its XHTML/HTML5 or a lower version, so put both: aTagElement.SetAttributeValue("name", nodeName); aTagElement.SetAttributeValue("id", nodeName); node.ChildNodes.Insert(0, aTagElement); AnchorNames.Add(nodeName); } } }
public int CompareTo(object obj) { if (!(obj is ChmDocumentNode)) { return(0); } ChmDocumentNode nodo = (ChmDocumentNode)obj; return(String.CompareOrdinal(Title.ToLower(), nodo.Title.ToLower())); }
private void ListaArchivosGenerados(List <string> lista, ChmDocumentNode nodo) { if (!nodo.DestinationFileName.Equals("") && !lista.Contains(nodo.DestinationFileName)) { lista.Add(nodo.DestinationFileName); } foreach (ChmDocumentNode hijo in nodo.Children) { ListaArchivosGenerados(lista, hijo); } }
/// <summary> /// Makes a recursive search on the document tree to change internal document links to point to the /// splitted files. Optionally repair broken links. /// </summary> /// <param name="node">Current node on the recursive search</param> private void ChangeInternalLinks(ChmDocumentNode node) { if (node.SplittedPartBody != null) { ChangeInternalLinks(node.SplittedPartBody); } foreach (ChmDocumentNode child in node.Children) { ChangeInternalLinks(child); } }
/// <summary> /// Splits recursivelly the nodes on the document by the cut level on the chmprocessor project. /// Each splitted node will store its own file. /// </summary> /// <param name="node">Node to check recursivelly</param> /// <param name="Cnt">Counter to assign unique file names</param> private void SplitFilesStructure(ChmDocumentNode node, ref int Cnt) { if (node.HeaderTag != null && IsCutHeader(node.HeaderTag)) { node.StoredAt(node.NombreArchivo(Cnt++)); } foreach (ChmDocumentNode hijo in node.Children) { SplitFilesStructure(hijo, ref Cnt); } }
/// <summary> /// Checks if a node is a HTML header tag (H1, H2, etc) upper or equal to the cut level /// defined by the project settings /// Also checks if it contains some text. /// </summary> /// <param name="node">HTML node to check</param> /// <returns>true if the tag is a non empty cut header</returns> private bool IsCutHeader(HtmlNode node) { // If its a Hx node and x <= MaximumLevel, and it contains text, its a cut node: if (!IsNonEmptyHeader(node)) { return(false); } int headerLevel = ChmDocumentNode.HeaderTagLevel(node); return(headerLevel <= Project.CutLevel); }
/// <summary> /// Stores a splitted document part on the node tree with the main title of the part. /// </summary> /// <param name="newBody">Body part to store</param> private void StoreBodyPart(HtmlNode newBody) { // Get the main header of this content part: HtmlNode sectionHeader = SearchFirstCutNode(newBody); ChmDocumentNode nodeToStore = null; if (sectionHeader == null) { //// If no section was found, it can be the first section of the document or it can be //// because there no is cut headers: //if (Project.CutLevel == 0 && Document.RootNode.Children.Count >= 2) //{ // // There is no cut headers, and there is some title into the document: // // If the part contains any title, this content should go to the first title of the // // document (Document.RootNode.Children[1]). I //} //if( nodeToStore == null ) nodeToStore = InitialNode; } else { string aName = ""; HtmlNode a = BuscarNodoA(sectionHeader); if (a != null && GetAttributeValue(a, "name") != null) { aName = GetAttributeValue(a, "name"); } nodeToStore = Document.RootNode.BuscarNodo(sectionHeader, aName); } if (nodeToStore == null) { string errorMessage = "Error searching node "; if (sectionHeader != null) { errorMessage += UnescapedInnerText(sectionHeader); } else { errorMessage += "<empty>"; } Exception error = new Exception(errorMessage); UI.Log(error); } else { nodeToStore.SplittedPartBody = newBody; nodeToStore.BuildListOfContainedANames(); // Store the A name's tags of the body. } }
/// <summary> /// Creates the plain html pages index for the document /// <param name="node">Current document tree node</param> /// </summary> private void CreatePagesIndex(ChmDocumentNode node) { string lastPage = Document.PagesIndex.Count > 0 ? Document.PagesIndex[Document.PagesIndex.Count - 1] : null; if (lastPage != node.DestinationFileName) { Document.PagesIndex.Add(node.DestinationFileName); } foreach (ChmDocumentNode child in node.Children) { CreatePagesIndex(child); } }
/// <summary> /// Saves the splitted content files of the document to HTML files into a directory. /// </summary> /// <param name="node">Current node on the recursive search</param> /// <param name="savedFiles">The content file names saved</param> /// <param name="directoryDstPath">Directory path where the content files will be stored</param> /// <param name="decorator">Tool to generate and decorate the HTML content files</param> /// <param name="indexer">Tool to index the saved content files. It can be null, if the content /// does not need to be indexed.</param> private void SaveContentFiles(ChmDocumentNode node, List <string> savedFiles, string directoryDstPath, HtmlPageDecorator decorator, WebIndex indexer) { string fileName = node.SaveContent(this, directoryDstPath, decorator, indexer); if (fileName != null) { savedFiles.Add(fileName); } foreach (ChmDocumentNode child in node.Children) { SaveContentFiles(child, savedFiles, directoryDstPath, decorator, indexer); } }
/// <summary> /// Creates the plain index of the document: The list of topics on the document, /// sorted by the title /// </summary> /// <param name="node">Current document node on the recursive search</param> /// <param name="nodeLevel">The node depth on the document tree</param> private void CreateDocumentIndex(ChmDocumentNode node, int nodeLevel) { if (Project.MaxHeaderIndex != 0 && nodeLevel > Project.MaxHeaderIndex) { return; } // Add to the content index Document.Index.Add(node); foreach (ChmDocumentNode child in node.Children) { CreateDocumentIndex(child, nodeLevel + 1); } }
/// <summary> /// Searches the first descendant section of this with a given title. /// The comparation is done without letter case. /// </summary> /// <param name="sectionTitle">The section title to seach</param> /// <returns>The first section of the document with that title. null if no section was /// found.</returns> public ChmDocumentNode SearchBySectionTitle(string sectionTitle) { if (this.Title.ToLower() == sectionTitle.ToLower()) { return(this); } foreach (ChmDocumentNode child in Children) { ChmDocumentNode result = child.SearchBySectionTitle(sectionTitle); if (result != null) { return(result); } } return(null); }
/// <summary> /// Makes a recursive search to ghet the first node with content of the document. /// If none is found, return nulls. /// </summary> private ChmDocumentNode FirstNodeWithContentSearch(ChmDocumentNode node) { if (node.SplittedPartBody != null) { return(node); } foreach (ChmDocumentNode child in node.Children) { ChmDocumentNode aux = FirstNodeWithContentSearch(child); if (aux != null) { return(aux); } } return(null); }
/// <summary> /// Busca recursivamente en el arbol un nodo HTML que tenga un tag A con un cierto name. /// </summary> /// <param name="aName">name del tag A a buscar</param> /// <returns>El nodo encontrado con este name. null si no se encuentra</returns> public ChmDocumentNode BuscarEnlace(string aName) { if (this.AnchorNames.Contains(aName) || (this.DescendantAnchorNames != null && this.DescendantAnchorNames.Contains(aName))) { return(this); } else { foreach (ChmDocumentNode hijo in Children) { ChmDocumentNode resultado = hijo.BuscarEnlace(aName); if (resultado != null) { return(resultado); } } } return(null); }
/// <summary> /// Makes a recursive search on the document to join empty nodes (node with a title and without /// any other content) with other nodes with content /// <param name="nodo">Current node on the recursive search to check</param> /// </summary> private void JoinEmptyNodes(ChmDocumentNode nodo) { try { if (UI.CancellRequested()) { return; } if (nodo.HeaderTag != null && nodo.HeaderTag.InnerText != null && nodo.SplittedPartBody != null) { // Nodo con cuerpo: if (UnescapedInnerText(nodo.HeaderTag).Trim() == UnescapedInnerText(nodo.SplittedPartBody).Trim() && nodo.Children.Count > 0) { // Nodo vacio y con hijos ChmDocumentNode hijo = (ChmDocumentNode)nodo.Children[0]; if (hijo.SplittedPartBody != null) { // El hijo tiene cuerpo: Unificarlos. nodo.SplittedPartBody.AppendChildren(hijo.SplittedPartBody.ChildNodes); hijo.SplittedPartBody = null; hijo.ReplaceFile(nodo.DestinationFileName); } } } foreach (ChmDocumentNode hijo in nodo.Children) { JoinEmptyNodes(hijo); } } catch (Exception ex) { UI.Log(new Exception("There was some problem when we tried to join the empty section " + nodo.Title + " with their children", ex)); } }
/// <summary> /// Adds a section to the section tree. /// The section will be added as child of the last section inserted with a level /// higher than then section /// </summary> /// <param name="nodo">HTML header tag with the title of the section</param> /// <param name="ui">Application log. It can be null</param> private void AddHeaderNode(HtmlNode node) { // Ignore empty headers (line breaks, etc) if (!IsNonEmptyHeader(node)) { return; } int headerLevel = ChmDocumentNode.HeaderTagLevel(node); if (LastedNodeInserted == null || headerLevel == 1) { // Add a document main section LastedNodeInserted = new ChmDocumentNode(Document, null, node, UI); Document.RootNode.AddChild(LastedNodeInserted); } else { // And a subsection ChmDocumentNode newNode = new ChmDocumentNode(Document, LastedNodeInserted, node, UI); if (LastedNodeInserted.HeaderLevel < headerLevel) { // Its a child section of the last inserted node LastedNodeInserted.Children.Add(newNode); } else { // Its a sibling node of the last inserted node. ChmDocumentNode actual = LastedNodeInserted.Parent; while (actual != Document.RootNode && actual.HeaderLevel >= headerLevel) { actual = actual.Parent; } actual.AddChild(newNode); } LastedNodeInserted = newNode; } }
/// <summary> /// Makes a recursive search on the document tree to change internal document links to point to the /// splitted files. Optionally repair broken links. /// </summary> /// <param name="node">Current node on the recursive search</param> private void ChangeInternalLinks(HtmlNode node) { try { if (node.Name.ToLower() == "a") { HtmlNode link = node; string href = GetAttributeValue(link, "href"); if (href != null) { // An hyperlink node if (href.StartsWith("#")) { // A internal link. // Replace it to point to the right splitted file. string safeRef = href.Substring(1); ChmDocumentNode nodoArbol = Document.RootNode.BuscarEnlace(safeRef); if (nodoArbol != null) { SetAttributeValue(link, "href", nodoArbol.DestinationFileName + "#" + safeRef); } else { // Broken link. UI.Log("WARNING: Broken link with text: '" + node.InnerText + "'", ChmLogLevel.WARNING); if (node.ParentNode != null) { String inText = UnescapedInnerText(node.ParentNode); if (inText != null) { if (inText.Length > 200) { inText = inText.Substring(0, 200) + "..."; } UI.Log(" near of text: '" + inText + "'", ChmLogLevel.WARNING); } } if (ReplaceBrokenLinks) { ReplaceBrokenLink(link); } } } } else if (GetAttributeValue(link, "name") != null) { // A HTML "boomark", the destination of a link. string anchor = GetAttributeValue(link, "name"); if (!anchor.Equals(anchor)) { string htmlNewNode = "<a name=\"" + anchor + "\"></a>"; HtmlNode newDomNode = HtmlNode.CreateNode(htmlNewNode); node.ParentNode.ReplaceChild(newDomNode, node); } } } // DO NOT USE AN ENUMERATOR HERE: Childs can be modified by ChangeInternalLinks /*foreach(HtmlNode child in node.ChildNodes) * ChangeInternalLinks(child);*/ for (int i = 0; i < node.ChildNodes.Count; i++) { ChangeInternalLinks(node.ChildNodes[i]); } } catch (Exception ex) { UI.Log(ex); } }
/// <summary> /// Makes a parse of the headers structure of the document /// </summary> /// <returns>The parsed dococument</returns> public ChmDocument ParseDocument() { UI.Log("Searching sections", ChmLogLevel.INFO); // Build a node for the content without an initial title InitialNode = new ChmDocumentNode(Document, Document.RootNode, null, UI); Document.RootNode.Children.Add(InitialNode); if (Document.Body == null) { throw new Exception("The document does not have a body tag. It's not valid HTML"); } // Parse recursivelly the document headers structure by headers sections ParseHeaderStructure(Document.Body); if (UI.CancellRequested()) { return(null); } // By default, all document goes to the section without any title foreach (ChmDocumentNode child in Document.RootNode.Children) { child.StoredAt(ChmDocument.INITIALSECTIONFILENAME); } // Now assign filenames where will be stored each section. int cnt = 1; foreach (ChmDocumentNode child in Document.RootNode.Children) { SplitFilesStructure(child, ref cnt); } if (UI.CancellRequested()) { return(null); } // Split the document content: UI.Log("Splitting file", ChmLogLevel.INFO); // TODO: This method content and all descendants are pure crap: Make a rewrite SplitContent(); if (UI.CancellRequested()) { return(null); } // Join empty nodes: UI.Log("Joining empty document sections", ChmLogLevel.INFO); JoinEmptyNodes(); if (UI.CancellRequested()) { return(null); } // Change internal document links to point to the splitted files. Optionally repair broken links. UI.Log("Changing internal links", ChmLogLevel.INFO); ChangeInternalLinks(Document.RootNode); if (UI.CancellRequested()) { return(null); } // Extract the embedded CSS styles of the document: UI.Log("Extracting CSS STYLE header tags", ChmLogLevel.INFO); CheckForStyleTags(); // If the initial node for content without title is empty, remove it: // There is two cases: if (Project.CutLevel == 0) { // A single page will be created: All the content is into the InitialNode. // If there is some title, move the content to the first one and remove the initial if (Document.RootNode.Children.Count >= 2) { ChmDocumentNode firstTitle = Document.RootNode.Children[1]; Document.RootNode.Children.Remove(InitialNode); firstTitle.SplittedPartBody = InitialNode.SplittedPartBody; } } else { // More than one page will be created. If the initial node has no content, remove it. if (InitialNode.EmptyTextContent) { Document.RootNode.Children.Remove(InitialNode); } else { // If the initial content had no title, change it to the project help title if (InitialNode.Title == ChmDocument.DEFAULTTILE && !string.IsNullOrEmpty(Project.HelpTitle)) { InitialNode.CustomNodeTitle = Project.HelpTitle; } } } if (UI.CancellRequested()) { return(null); } // Create the document and pages index UI.Log("Creating document index", ChmLogLevel.INFO); CreateDocumentIndex(); CreatePagesIndex(); return(Document); }
/// <summary> /// Appends a child to this node /// </summary> /// <param name="node">Child node to add</param> public void AddChild(ChmDocumentNode node) { node.Parent = this; Children.Add(node); }
/// <summary> /// Constructor /// TODO: Remove this constructor /// </summary> public ChmDocument() { RootNode = new ChmDocumentNode(this, null, null, null); RootNode.HeaderLevel = 0; }