/// <summary> /// Builds a list tree. /// </summary> /// <param name="ordered"><c>true</c> for an ordered list.</param> /// <param name="html">The input HTML.</param> /// <returns>The list tree.</returns> private static HtmlList BuildListTree(bool ordered, string html) { string[] tags = new string[] { "<ol>", "<ul>", "<li>", "</li>", "</ul>", "</ol>" }; // IE seems to add new-lines after some elements // \r\n are never added by the Formatter, so it is safe to remove all them html = html.Replace("\r", ""); html = html.Replace("\n", ""); int index = 0; int lastOpenListItemIndex = 0; int stringFound; HtmlList root = new HtmlList(ordered ? HtmlListType.Ordered : HtmlListType.Unordered); HtmlList currentList = root; do { index = FirstIndexOfAny(html, index, out stringFound, tags); if(index != -1) { switch(stringFound) { case 0: // <ol> // Unless at the beginning, start a new sub-list if(index != 0) { // Set text of current element (sub-lists are added into the previous item) if(lastOpenListItemIndex != -1) { string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4)); currentList.Elements[currentList.Elements.Count - 1].Text = text; } currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Ordered); currentList = currentList.Elements[currentList.Elements.Count - 1].SubList; } break; case 1: // <ul> // Unless at the beginning, start a new sub-list if(index != 0) { // Set text of current element (sub-lists are added into the previous item) if(lastOpenListItemIndex != -1) { string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4)); currentList.Elements[currentList.Elements.Count - 1].Text = text; } currentList.Elements[currentList.Elements.Count - 1].SubList = new HtmlList(HtmlListType.Unordered); currentList = currentList.Elements[currentList.Elements.Count - 1].SubList; } break; case 2: // <li> lastOpenListItemIndex = index; currentList.Elements.Add(new HtmlListElement()); break; case 3: // </li> // If lastOpenListItemIndex != -1 (i.e. there are no sub-lists) extract item text and set it to the last list element // Otherwise, navigate upwards to parent list (if any) if(lastOpenListItemIndex != -1) { string text = html.Substring(lastOpenListItemIndex + 4, index - (lastOpenListItemIndex + 4)); currentList.Elements[currentList.Elements.Count - 1].Text = text; } else { currentList = FindAnchestor(root, currentList); } break; case 4: // </ul> // Close last open list (nothing to do) lastOpenListItemIndex = -1; break; case 5: // </ol> // Close last open list (nothing to do) lastOpenListItemIndex = -1; break; default: throw new NotSupportedException(); } index++; } } while(index != -1); return root; }
/// <summary> /// Finds the anchestor of a list in a tree. /// </summary> /// <param name="root">The root of the tree.</param> /// <param name="current">The current element.</param> /// <returns>The anchestor of <b>current</b>.</returns> private static HtmlList FindAnchestor(HtmlList root, HtmlList current) { foreach(HtmlListElement elem in root.Elements) { if(elem.SubList == current) return root; else if(elem.SubList != null) { HtmlList temp = FindAnchestor(elem.SubList, current); if(temp != null) return temp; } } //return root; return null; }
/// <summary> /// Builds the WikiMarkup for a list. /// </summary> /// <param name="list">The root list.</param> /// <param name="previousBullets">The previous bullets, used at upper levels.</param> /// <returns>The WikiMarkup.</returns> private static string BuildListWikiMarkup(HtmlList list, string previousBullets) { previousBullets = previousBullets + (list.Type == HtmlListType.Ordered ? "#" : "*"); StringBuilder sb = new StringBuilder(500); foreach(HtmlListElement elem in list.Elements) { sb.Append(previousBullets); sb.Append(" "); sb.Append(elem.Text); sb.Append("\r\n"); if(elem.SubList != null) { sb.Append(BuildListWikiMarkup(elem.SubList, previousBullets)); } } // Remove empty lines in the middle of the list string raw = sb.ToString().Replace("\r", ""); string[] lines = raw.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); return string.Join("\r\n", lines) + (raw.EndsWith("\r\n") || raw.EndsWith("\n") ? "\r\n" : ""); }