private static string GenerateTableHtml(WikiTable table) { StringBuilder html = new StringBuilder(); html.Append(Tools.Repeat("<dl><dd>", table.Indent)); html.Append("<table"); if (table.Attributes.Count > 0) html.Append(table.Attributes.ToString()); html.Append(">"); if (table.Caption.Length > 0) html.Append(string.Concat(Environment.NewLine, "<caption>", table.Caption, Environment.NewLine, "</caption>")); // If we only have tbodies, mark table as simple WikiRowType lastSectionType = WikiRowType.undefined; bool isEmpty = true; bool isSimple = true; for (int i = 0; i < table.Rows.Count; ++i) { if (table.Rows[i].Cells.Count == 0) continue; if (table.Rows[i].Type == WikiRowType.undefined) table.Rows[i].Type = WikiRowType.tbody; if (lastSectionType == WikiRowType.undefined) lastSectionType = table.Rows[i].Type; else if (lastSectionType != table.Rows[i].Type) { isSimple = false; break; } } lastSectionType = WikiRowType.undefined; for (int i = 0; i < table.Rows.Count; ++i) { // Check for empty tables if (table.Rows[i].Cells.Count > 0) isEmpty = false; else continue; if (table.Rows[i].Type != lastSectionType && !isSimple) html.Append(string.Concat(Environment.NewLine, "<", table.Rows[i].Type, ">")); html.Append(Environment.NewLine); html.Append("<tr"); if (table.Rows[i].Attributes.Count > 0) html.Append(table.Rows[i].Attributes.ToString()); html.Append(">"); for (int j = 0; j < table.Rows[i].Cells.Count; ++j) { if (table.Rows[i].Cells[j].Type == WikiCellType.undefined) table.Rows[i].Cells[j].Type = WikiCellType.td; html.Append(Environment.NewLine); html.Append("<"); html.Append(table.Rows[i].Cells[j].Type); if (table.Rows[i].Cells[j].Attributes.Count > 0) html.Append(table.Rows[i].Cells[j].Attributes.ToString()); html.Append(">"); html.Append(table.Rows[i].Cells[j].Content); if (table.Rows[i].Cells[j].Content.Length > 0) html.Append(Environment.NewLine); html.Append(string.Concat("</", table.Rows[i].Cells[j].Type, ">")); } html.Append(Environment.NewLine); html.Append("</tr>"); if ((i + 1 >= table.Rows.Count && !isSimple) || (i + 1 < table.Rows.Count && table.Rows[i + 1].Type != WikiRowType.undefined && table.Rows[i].Type != table.Rows[i + 1].Type)) html.Append(string.Concat("</", table.Rows[i].Type, ">")); lastSectionType = table.Rows[i].Type; if (isEmpty) { if (table.Caption.Length > 0) html.Append(Environment.NewLine + "<tr><td></td></tr>"); else return string.Empty; } } html.Append(Environment.NewLine); html.Append("</table>"); html.Append(Tools.Repeat("</dd></dl>", table.Indent)); return html.ToString(); }
private static string ParseTables(string text) { StringBuilder result = new StringBuilder(); // aka Wikipedia out StringBuilder content = result; // aka Wikipedia output List<WikiTable> tables = new List<WikiTable>(); WikiTable currentTable = null; // aka Wikipedia table WikiTable lastTable = null; // aka Wikipedia curtable WikiRow currentRow = null; WikiCell currentCell = null; bool isInCell = false; bool isInCaption = false; Regex tableStart = new Regex(@"^(:*)\s*\{\|(.*)$"); // ":: {|" where : is the indent character string[] lines = text.Split(new string[] { Environment.NewLine }, StringSplitOptions.None); foreach (string line in lines) { string trimmedLine = line.Trim(); if (trimmedLine.Length == 0) { // Empty line, go to next line, but only append new line if outside of table. content.AppendLine(line); continue; } // Prefix - used to find out what we are dealing with. string prefix = trimmedLine[0].ToString(); if (trimmedLine.Length > 1) prefix += (trimmedLine[1] == '}' || trimmedLine[1] == '+' || trimmedLine[1] == '-') ? trimmedLine[1].ToString() : string.Empty; MatchCollection tableStartMatches = tableStart.Matches(trimmedLine); if (tableStartMatches.Count > 0) // OPEN TABLE { currentTable = new WikiTable(); currentTable.Indent = tableStartMatches[0].Groups[1].Length; foreach (WikiAttribute attribute in WikiAttributes.ParseAttributes(tableStartMatches[0].Groups[2].Value)) currentTable.Attributes.Add(attribute); currentRow = new WikiRow(); currentTable.Rows.Add(currentRow); tables.Add(currentTable); } else if (tables == null || tables.Count == 0) // OUTSIDE { result.AppendLine(line); } else if (prefix == "|}") // CLOSE TABLE { // Trim the |} code from the line trimmedLine = trimmedLine.Remove(0, 2); // A thead at the end becomes a tfoot, unless there is only one row. // Do this before deleting empty last lines to allow headers at the bottom of tables. WikiRow lastRow = currentTable.LastRow; if (lastRow != null && lastRow.Type == WikiRowType.thead && currentTable.Rows[1] != null) { lastRow.Type = WikiRowType.tfoot; for (int i = 0; i < lastRow.Cells.Count; ++i) lastRow.Cells[i].Type = WikiCellType.th; } // Delete empty last lines if (lastRow == null || lastRow.Cells.Count == 0) lastRow = null; string tableHtml = string.Empty; int lastTableIndex = tables.Count - 1; lastTable = tables[lastTableIndex]; tables.RemoveAt(lastTableIndex--); // Note the index-- since we are removing one. // Add a line-ending before the table, but only if there isn't one already. if (result.Length > 2 && result.ToString(result.Length - 2, 2) != Environment.NewLine) tableHtml += Environment.NewLine; tableHtml += string.Concat(GenerateTableHtml(lastTable), trimmedLine, Environment.NewLine); if (tables.Count > 0) { currentTable = tables[lastTableIndex]; currentRow = currentTable.LastRow; currentCell = currentRow.LastCell; content = new StringBuilder(currentCell.Content); isInCell = true; isInCaption = false; } else { isInCell = false; isInCaption = false; content = result; } if (isInCell) currentCell.Content += tableHtml; content.Append(tableHtml); } else if (prefix == "|-") // ROW { // Start a new row element but only when we haven't started one already. if (currentRow != null && currentRow.Cells.Count != 0) { currentRow = new WikiRow(); currentTable.Rows.Add(currentRow); } // Get the attributes, there's nothing else useful in line now. trimmedLine = trimmedLine.Substring(2); foreach (WikiAttribute attribute in WikiAttributes.ParseAttributes(trimmedLine)) currentRow.Attributes.Add(attribute); } else if (prefix == "|+") // CAPTION { // A table caption, but only proceed if there isn't one already. if (string.IsNullOrEmpty(currentTable.Caption)) { trimmedLine = trimmedLine.Substring(2); currentTable.Caption = trimmedLine; content = new StringBuilder(currentTable.Caption); isInCaption = true; isInCell = false; } } else if (prefix == "|" || prefix == "!" || prefix == "!+") // CELL { // Which kind of cells are we dealing with? WikiCellType currentTag = WikiCellType.td; trimmedLine = trimmedLine.Substring(1); if (prefix == "!" || prefix == "!+") { trimmedLine = trimmedLine.Replace("!!", "||"); currentTag = WikiCellType.th; } // Split up multiple cells on the same line. string[] cells = trimmedLine.Split(new string[] { "||" }, StringSplitOptions.None); trimmedLine = string.Empty; // Save memory. // Decide whether thead to tbody. if (currentRow.Type != WikiRowType.undefined) currentRow.Type = prefix == "!" ? WikiRowType.thead : WikiRowType.tbody; else if (prefix == "|") currentRow.Type = WikiRowType.tbody; // Loop through each table cell. foreach (string cell in cells) { // A new cell. WikiAttributes attributes = new WikiAttributes(); currentCell = new WikiCell(); currentCell.Type = currentTag; currentCell.Content = ParseTableCell(cell, ref attributes); currentCell.Attributes = attributes; currentRow.Cells.Add(currentCell); } content = new StringBuilder(currentCell.Content); isInCell = true; isInCaption = false; } else // NORMAL LINE INSIDE A TABLE { if (isInCell) currentCell.Content += Environment.NewLine + line; else if (isInCaption) currentTable.Caption += Environment.NewLine + line; content.Append(Environment.NewLine + line); } } // Remove trailing line-ending (b/c). if (result.ToString(result.Length - 2, 2) == Environment.NewLine) result = result.Remove(result.Length - 2, 2); // Close any unclosed tables if (tables != null && tables.Count > 0) { int lastTableIndex = tables.Count - 1; string tableHtml; for (int i = 0; i < tables.Count; ++i) { lastTable = tables[lastTableIndex]; tableHtml = GenerateTableHtml(lastTable); tables.RemoveAt(lastTableIndex--); // Add a line-ending before the table, but only if there isn't one already. if (result.ToString(result.Length - 2, 2) != Environment.NewLine && !string.IsNullOrEmpty(tableHtml)) result.Append(Environment.NewLine); result.Append(tableHtml); } } return result.ToString(); }