/// <summary>
 /// Busca recursivamente un nodo HTML dentro del arbol de capitulos
 /// </summary>
 /// <param name="element">El nodo HTML a buscar</param>
 /// <returns>El nodo que lo contiene. Null, si no se encontro.</returns>
 public ChmDocumentNode BuscarNodo(HtmlNode element, string aNameElement)
 {
     // Mirar si es el mismo nodo:
     if (this.HeaderTag != null && element != null)
     {
         if (!aNameElement.Equals(""))
         {
             if (this.AnchorNames.Contains(aNameElement))
             {
                 return(this);
             }
         }
         else
         {
             // Para evitar el error del about:blank en los src de las imagenes:
             string t1 = HeaderTag.OuterHtml.Replace("about:blank", "").Replace("about:", "");
             string t2 = element.OuterHtml.Replace("about:blank", "").Replace("about:", "");
             if (t1.Equals(t2))
             {
                 return(this);
             }
         }
     }
     // Sino , buscar en los hijos:
     foreach (ChmDocumentNode hijo in Children)
     {
         ChmDocumentNode resultado = hijo.BuscarNodo(element, aNameElement);
         if (resultado != null)
         {
             return(resultado);
         }
     }
     return(null);
 }
 /// <summary>
 /// Repair or remove an internal link.
 /// Given a broken internal link, it searches a section title of the document with the
 /// same text of the broken link. If its found, the destination link is modified to point to
 /// that section. If a matching section is not found, the link will be removed and its content
 /// will be keept.
 /// </summary>
 /// <param name="link">The broken link</param>
 private void ReplaceBrokenLink(HtmlNode link)
 {
     try
     {
         // Get the text of the link
         string linkText = UnescapedInnerText(link).Trim();
         // Seach a title with the same text of the link:
         ChmDocumentNode destinationTitle = Document.SearchBySectionTitle(linkText);
         if (destinationTitle != null)
         {
             // Replace the original internal broken link with this:
             SetAttributeValue(link, "href", destinationTitle.Href);
         }
         else
         {
             // No candidate title was found. Remove the link and keep its content
             foreach (HtmlNode child in link.ChildNodes)
             {
                 link.ParentNode.InsertBefore(child, link);
             }
             link.ParentNode.RemoveChild(link);
         }
     }
     catch (Exception ex)
     {
         UI.Log("Error reparining a broken link", ChmLogLevel.ERROR);
         UI.Log(ex);
     }
 }
        /// <summary>
        /// Tree section node constructor
        /// </summary>
        /// <param name="document">The document owner of this node</param>
        /// <param name="parent">Parent section of the section to create. null if the node to create is the root section.</param>
        /// <param name="node">HTML header tag for this section</param>
        /// <param name="ui">Application log. It can be null</param>
        public ChmDocumentNode(ChmDocument document, ChmDocumentNode parent, HtmlNode node, UserInterface ui)
        {
            this.Parent         = parent;
            this.HeaderTag      = node;
            Children            = new List <ChmDocumentNode>();
            HeaderLevel         = HeaderTagLevel(node);
            DestinationFileName = "";

            AnchorNames = new List <string>();
            if (node != null)
            {
                // Check if the header tag has some anchor
                foreach (HtmlNode child in node.ChildNodes)
                {
                    string name = ChmDocumentParser.GetAnchorName(child);
                    if (name != null && name.Trim() != string.Empty)
                    {
                        AnchorNames.Add(name);
                    }
                }
                if (AnchorNames.Count == 0)
                {
                    // Si no tiene ningun nombre, darle uno artificial:
                    int      number      = LatestCustomAnchorNumber++;
                    string   nodeName    = "node" + number.ToString();
                    HtmlNode aTagElement = document.HtmlDoc.CreateElement("a");
                    // XHTML/HTML5 uses the "id" attribute, and HTML4 "name"
                    // There is no safe way to check if its XHTML/HTML5 or a lower version, so put both:
                    aTagElement.SetAttributeValue("name", nodeName);
                    aTagElement.SetAttributeValue("id", nodeName);
                    node.ChildNodes.Insert(0, aTagElement);
                    AnchorNames.Add(nodeName);
                }
            }
        }
        public int CompareTo(object obj)
        {
            if (!(obj is ChmDocumentNode))
            {
                return(0);
            }
            ChmDocumentNode nodo = (ChmDocumentNode)obj;

            return(String.CompareOrdinal(Title.ToLower(), nodo.Title.ToLower()));
        }
Beispiel #5
0
 private void ListaArchivosGenerados(List <string> lista, ChmDocumentNode nodo)
 {
     if (!nodo.DestinationFileName.Equals("") && !lista.Contains(nodo.DestinationFileName))
     {
         lista.Add(nodo.DestinationFileName);
     }
     foreach (ChmDocumentNode hijo in nodo.Children)
     {
         ListaArchivosGenerados(lista, hijo);
     }
 }
        /// <summary>
        /// Makes a recursive search on the document tree to change internal document links to point to the
        /// splitted files. Optionally repair broken links.
        /// </summary>
        /// <param name="node">Current node on the recursive search</param>
        private void ChangeInternalLinks(ChmDocumentNode node)
        {
            if (node.SplittedPartBody != null)
            {
                ChangeInternalLinks(node.SplittedPartBody);
            }

            foreach (ChmDocumentNode child in node.Children)
            {
                ChangeInternalLinks(child);
            }
        }
        /// <summary>
        /// Splits recursivelly the nodes on the document by the cut level on the chmprocessor project.
        /// Each splitted node will store its own file.
        /// </summary>
        /// <param name="node">Node to check recursivelly</param>
        /// <param name="Cnt">Counter to assign unique file names</param>
        private void SplitFilesStructure(ChmDocumentNode node, ref int Cnt)
        {
            if (node.HeaderTag != null && IsCutHeader(node.HeaderTag))
            {
                node.StoredAt(node.NombreArchivo(Cnt++));
            }

            foreach (ChmDocumentNode hijo in node.Children)
            {
                SplitFilesStructure(hijo, ref Cnt);
            }
        }
        /// <summary>
        /// Checks if a node is a HTML header tag (H1, H2, etc) upper or equal to the cut level
        /// defined by the project settings
        /// Also checks if it contains some text.
        /// </summary>
        /// <param name="node">HTML node to check</param>
        /// <returns>true if the tag is a non empty cut header</returns>
        private bool IsCutHeader(HtmlNode node)
        {
            // If its a Hx node and x <= MaximumLevel, and it contains text, its a cut node:
            if (!IsNonEmptyHeader(node))
            {
                return(false);
            }

            int headerLevel = ChmDocumentNode.HeaderTagLevel(node);

            return(headerLevel <= Project.CutLevel);
        }
        /// <summary>
        /// Stores a splitted document part on the node tree with the main title of the part.
        /// </summary>
        /// <param name="newBody">Body part to store</param>
        private void StoreBodyPart(HtmlNode newBody)
        {
            // Get the main header of this content part:
            HtmlNode        sectionHeader = SearchFirstCutNode(newBody);
            ChmDocumentNode nodeToStore   = null;

            if (sectionHeader == null)
            {
                //// If no section was found, it can be the first section of the document or it can be
                //// because there no is cut headers:
                //if (Project.CutLevel == 0 && Document.RootNode.Children.Count >= 2)
                //{
                //    // There is no cut headers, and there is some title into the document:
                //    // If the part contains any title, this content should go to the first title of the
                //    // document (Document.RootNode.Children[1]). I
                //}

                //if( nodeToStore == null )
                nodeToStore = InitialNode;
            }
            else
            {
                string   aName = "";
                HtmlNode a     = BuscarNodoA(sectionHeader);
                if (a != null && GetAttributeValue(a, "name") != null)
                {
                    aName = GetAttributeValue(a, "name");
                }
                nodeToStore = Document.RootNode.BuscarNodo(sectionHeader, aName);
            }

            if (nodeToStore == null)
            {
                string errorMessage = "Error searching node ";
                if (sectionHeader != null)
                {
                    errorMessage += UnescapedInnerText(sectionHeader);
                }
                else
                {
                    errorMessage += "<empty>";
                }
                Exception error = new Exception(errorMessage);
                UI.Log(error);
            }
            else
            {
                nodeToStore.SplittedPartBody = newBody;
                nodeToStore.BuildListOfContainedANames();  // Store the A name's tags of the body.
            }
        }
        /// <summary>
        /// Creates the plain html pages index for the document
        /// <param name="node">Current document tree node</param>
        /// </summary>
        private void CreatePagesIndex(ChmDocumentNode node)
        {
            string lastPage = Document.PagesIndex.Count > 0 ? Document.PagesIndex[Document.PagesIndex.Count - 1] : null;

            if (lastPage != node.DestinationFileName)
            {
                Document.PagesIndex.Add(node.DestinationFileName);
            }

            foreach (ChmDocumentNode child in node.Children)
            {
                CreatePagesIndex(child);
            }
        }
Beispiel #11
0
        /// <summary>
        /// Saves the splitted content files of the document to HTML files into a directory.
        /// </summary>
        /// <param name="node">Current node on the recursive search</param>
        /// <param name="savedFiles">The content file names saved</param>
        /// <param name="directoryDstPath">Directory path where the content files will be stored</param>
        /// <param name="decorator">Tool to generate and decorate the HTML content files</param>
        /// <param name="indexer">Tool to index the saved content files. It can be null, if the content
        /// does not need to be indexed.</param>
        private void SaveContentFiles(ChmDocumentNode node, List <string> savedFiles, string directoryDstPath, HtmlPageDecorator decorator, WebIndex indexer)
        {
            string fileName = node.SaveContent(this, directoryDstPath, decorator, indexer);

            if (fileName != null)
            {
                savedFiles.Add(fileName);
            }

            foreach (ChmDocumentNode child in node.Children)
            {
                SaveContentFiles(child, savedFiles, directoryDstPath, decorator, indexer);
            }
        }
        /// <summary>
        /// Creates the plain index of the document: The list of topics on the document,
        /// sorted by the title
        /// </summary>
        /// <param name="node">Current document node on the recursive search</param>
        /// <param name="nodeLevel">The node depth on the document tree</param>
        private void CreateDocumentIndex(ChmDocumentNode node, int nodeLevel)
        {
            if (Project.MaxHeaderIndex != 0 && nodeLevel > Project.MaxHeaderIndex)
            {
                return;
            }

            // Add to the content index
            Document.Index.Add(node);

            foreach (ChmDocumentNode child in node.Children)
            {
                CreateDocumentIndex(child, nodeLevel + 1);
            }
        }
 /// <summary>
 /// Searches the first descendant section of this with a given title.
 /// The comparation is done without letter case.
 /// </summary>
 /// <param name="sectionTitle">The section title to seach</param>
 /// <returns>The first section of the document with that title. null if no section was
 /// found.</returns>
 public ChmDocumentNode SearchBySectionTitle(string sectionTitle)
 {
     if (this.Title.ToLower() == sectionTitle.ToLower())
     {
         return(this);
     }
     foreach (ChmDocumentNode child in Children)
     {
         ChmDocumentNode result = child.SearchBySectionTitle(sectionTitle);
         if (result != null)
         {
             return(result);
         }
     }
     return(null);
 }
Beispiel #14
0
        /// <summary>
        /// Makes a recursive search to ghet the first node with content of the document.
        /// If none is found, return nulls.
        /// </summary>
        private ChmDocumentNode FirstNodeWithContentSearch(ChmDocumentNode node)
        {
            if (node.SplittedPartBody != null)
            {
                return(node);
            }

            foreach (ChmDocumentNode child in node.Children)
            {
                ChmDocumentNode aux = FirstNodeWithContentSearch(child);
                if (aux != null)
                {
                    return(aux);
                }
            }
            return(null);
        }
 /// <summary>
 /// Busca recursivamente en el arbol un nodo HTML que tenga un tag A con un cierto name.
 /// </summary>
 /// <param name="aName">name del tag A a buscar</param>
 /// <returns>El nodo encontrado con este name. null si no se encuentra</returns>
 public ChmDocumentNode BuscarEnlace(string aName)
 {
     if (this.AnchorNames.Contains(aName) || (this.DescendantAnchorNames != null && this.DescendantAnchorNames.Contains(aName)))
     {
         return(this);
     }
     else
     {
         foreach (ChmDocumentNode hijo in Children)
         {
             ChmDocumentNode resultado = hijo.BuscarEnlace(aName);
             if (resultado != null)
             {
                 return(resultado);
             }
         }
     }
     return(null);
 }
        /// <summary>
        /// Makes a recursive search on the document to join empty nodes (node with a title and without
        /// any other content) with other nodes with content
        /// <param name="nodo">Current node on the recursive search to check</param>
        /// </summary>
        private void JoinEmptyNodes(ChmDocumentNode nodo)
        {
            try
            {
                if (UI.CancellRequested())
                {
                    return;
                }

                if (nodo.HeaderTag != null && nodo.HeaderTag.InnerText != null && nodo.SplittedPartBody != null)
                {
                    // Nodo con cuerpo:

                    if (UnescapedInnerText(nodo.HeaderTag).Trim() == UnescapedInnerText(nodo.SplittedPartBody).Trim() &&
                        nodo.Children.Count > 0)
                    {
                        // Nodo vacio y con hijos
                        ChmDocumentNode hijo = (ChmDocumentNode)nodo.Children[0];
                        if (hijo.SplittedPartBody != null)
                        {
                            // El hijo tiene cuerpo: Unificarlos.
                            nodo.SplittedPartBody.AppendChildren(hijo.SplittedPartBody.ChildNodes);
                            hijo.SplittedPartBody = null;
                            hijo.ReplaceFile(nodo.DestinationFileName);
                        }
                    }
                }

                foreach (ChmDocumentNode hijo in nodo.Children)
                {
                    JoinEmptyNodes(hijo);
                }
            }
            catch (Exception ex)
            {
                UI.Log(new Exception("There was some problem when we tried to join the empty section " +
                                     nodo.Title + " with their children", ex));
            }
        }
        /// <summary>
        /// Adds a section to the section tree.
        /// The section will be added as child of the last section inserted with a level
        /// higher than then section
        /// </summary>
        /// <param name="nodo">HTML header tag with the title of the section</param>
        /// <param name="ui">Application log. It can be null</param>
        private void AddHeaderNode(HtmlNode node)
        {
            // Ignore empty headers (line breaks, etc)
            if (!IsNonEmptyHeader(node))
            {
                return;
            }

            int headerLevel = ChmDocumentNode.HeaderTagLevel(node);

            if (LastedNodeInserted == null || headerLevel == 1)
            {
                // Add a document main section
                LastedNodeInserted = new ChmDocumentNode(Document, null, node, UI);
                Document.RootNode.AddChild(LastedNodeInserted);
            }
            else
            {
                // And a subsection
                ChmDocumentNode newNode = new ChmDocumentNode(Document, LastedNodeInserted, node, UI);
                if (LastedNodeInserted.HeaderLevel < headerLevel)
                {
                    // Its a child section of the last inserted node
                    LastedNodeInserted.Children.Add(newNode);
                }
                else
                {
                    // Its a sibling node of the last inserted node.
                    ChmDocumentNode actual = LastedNodeInserted.Parent;
                    while (actual != Document.RootNode && actual.HeaderLevel >= headerLevel)
                    {
                        actual = actual.Parent;
                    }
                    actual.AddChild(newNode);
                }
                LastedNodeInserted = newNode;
            }
        }
        /// <summary>
        /// Makes a recursive search on the document tree to change internal document links to point to the
        /// splitted files. Optionally repair broken links.
        /// </summary>
        /// <param name="node">Current node on the recursive search</param>
        private void ChangeInternalLinks(HtmlNode node)
        {
            try
            {
                if (node.Name.ToLower() == "a")
                {
                    HtmlNode link = node;
                    string   href = GetAttributeValue(link, "href");
                    if (href != null)
                    {
                        // An hyperlink node
                        if (href.StartsWith("#"))
                        {
                            // A internal link.
                            // Replace it to point to the right splitted file.
                            string          safeRef   = href.Substring(1);
                            ChmDocumentNode nodoArbol = Document.RootNode.BuscarEnlace(safeRef);
                            if (nodoArbol != null)
                            {
                                SetAttributeValue(link, "href", nodoArbol.DestinationFileName + "#" + safeRef);
                            }
                            else
                            {
                                // Broken link.
                                UI.Log("WARNING: Broken link with text: '" + node.InnerText + "'", ChmLogLevel.WARNING);
                                if (node.ParentNode != null)
                                {
                                    String inText = UnescapedInnerText(node.ParentNode);
                                    if (inText != null)
                                    {
                                        if (inText.Length > 200)
                                        {
                                            inText = inText.Substring(0, 200) + "...";
                                        }
                                        UI.Log(" near of text: '" + inText + "'", ChmLogLevel.WARNING);
                                    }
                                }
                                if (ReplaceBrokenLinks)
                                {
                                    ReplaceBrokenLink(link);
                                }
                            }
                        }
                    }
                    else if (GetAttributeValue(link, "name") != null)
                    {
                        // A HTML "boomark", the destination of a link.
                        string anchor = GetAttributeValue(link, "name");
                        if (!anchor.Equals(anchor))
                        {
                            string   htmlNewNode = "<a name=\"" + anchor + "\"></a>";
                            HtmlNode newDomNode  = HtmlNode.CreateNode(htmlNewNode);
                            node.ParentNode.ReplaceChild(newDomNode, node);
                        }
                    }
                }

                // DO NOT USE AN ENUMERATOR HERE: Childs can be modified by ChangeInternalLinks

                /*foreach(HtmlNode child in node.ChildNodes)
                 *  ChangeInternalLinks(child);*/
                for (int i = 0; i < node.ChildNodes.Count; i++)
                {
                    ChangeInternalLinks(node.ChildNodes[i]);
                }
            }
            catch (Exception ex)
            {
                UI.Log(ex);
            }
        }
        /// <summary>
        /// Makes a parse of the headers structure of the document
        /// </summary>
        /// <returns>The parsed dococument</returns>
        public ChmDocument ParseDocument()
        {
            UI.Log("Searching sections", ChmLogLevel.INFO);

            // Build a node for the content without an initial title
            InitialNode = new ChmDocumentNode(Document, Document.RootNode, null, UI);
            Document.RootNode.Children.Add(InitialNode);

            if (Document.Body == null)
            {
                throw new Exception("The document does not have a body tag. It's not valid HTML");
            }

            // Parse recursivelly the document headers structure by headers sections
            ParseHeaderStructure(Document.Body);

            if (UI.CancellRequested())
            {
                return(null);
            }

            // By default, all document goes to the section without any title
            foreach (ChmDocumentNode child in Document.RootNode.Children)
            {
                child.StoredAt(ChmDocument.INITIALSECTIONFILENAME);
            }

            // Now assign filenames where will be stored each section.
            int cnt = 1;

            foreach (ChmDocumentNode child in Document.RootNode.Children)
            {
                SplitFilesStructure(child, ref cnt);
            }

            if (UI.CancellRequested())
            {
                return(null);
            }

            // Split the document content:
            UI.Log("Splitting file", ChmLogLevel.INFO);
            // TODO: This method content and all descendants are pure crap: Make a rewrite
            SplitContent();

            if (UI.CancellRequested())
            {
                return(null);
            }

            // Join empty nodes:
            UI.Log("Joining empty document sections", ChmLogLevel.INFO);
            JoinEmptyNodes();

            if (UI.CancellRequested())
            {
                return(null);
            }

            // Change internal document links to point to the splitted files. Optionally repair broken links.
            UI.Log("Changing internal links", ChmLogLevel.INFO);
            ChangeInternalLinks(Document.RootNode);

            if (UI.CancellRequested())
            {
                return(null);
            }

            // Extract the embedded CSS styles of the document:
            UI.Log("Extracting CSS STYLE header tags", ChmLogLevel.INFO);
            CheckForStyleTags();

            // If the initial node for content without title is empty, remove it:
            // There is two cases:
            if (Project.CutLevel == 0)
            {
                // A single page will be created: All the content is into the InitialNode.
                // If there is some title, move the content to the first one and remove the initial
                if (Document.RootNode.Children.Count >= 2)
                {
                    ChmDocumentNode firstTitle = Document.RootNode.Children[1];
                    Document.RootNode.Children.Remove(InitialNode);
                    firstTitle.SplittedPartBody = InitialNode.SplittedPartBody;
                }
            }
            else
            {
                // More than one page will be created. If the initial node has no content, remove it.
                if (InitialNode.EmptyTextContent)
                {
                    Document.RootNode.Children.Remove(InitialNode);
                }
                else
                {
                    // If the initial content had no title, change it to the project help title
                    if (InitialNode.Title == ChmDocument.DEFAULTTILE &&
                        !string.IsNullOrEmpty(Project.HelpTitle))
                    {
                        InitialNode.CustomNodeTitle = Project.HelpTitle;
                    }
                }
            }

            if (UI.CancellRequested())
            {
                return(null);
            }

            // Create the document and pages index
            UI.Log("Creating document index", ChmLogLevel.INFO);
            CreateDocumentIndex();
            CreatePagesIndex();

            return(Document);
        }
 /// <summary>
 /// Appends a child to this node
 /// </summary>
 /// <param name="node">Child node to add</param>
 public void AddChild(ChmDocumentNode node)
 {
     node.Parent = this;
     Children.Add(node);
 }
Beispiel #21
0
 /// <summary>
 /// Constructor
 /// TODO: Remove this constructor
 /// </summary>
 public ChmDocument()
 {
     RootNode             = new ChmDocumentNode(this, null, null, null);
     RootNode.HeaderLevel = 0;
 }