public void PrintTable(string prefix, TextWriter sw) { sw.WriteLine("[" + prefix + "] START TABLE"); for (int i = 0; i < content.Count; i++) { List<object> subContent = content[i]; for (int j = 0; j < subContent.Count; j++) { string subprefix = null; if (string.IsNullOrEmpty(prefix)) subprefix = ("" + i + "," + j); else subprefix += (prefix + "," + i + "," + j); object o = subContent[j]; if (o is ParsedTable) { ParsedTable pt = o as ParsedTable; pt.PrintTable(subprefix, sw); } else { sw.WriteLine("[" + subprefix + "] START CONTENT"); if (o == null) sw.WriteLine("NULL"); else sw.WriteLine(o.ToString()); sw.WriteLine("[" + subprefix + "] END CONTENT"); } } } sw.WriteLine("[" + prefix + "] END TABLE"); }
private static ParsedTable parseHtmlAsTable(string mainString) { //At this point, we know that the entire "mainString" occurred immediately within //"<table> and "</endtable>" //therefore, we have to parse it into rows and columns. //this function will parse it into rows ParsedTable table = new ParsedTable(); int searchStartIndex = 0; while (true) { int trsi = -1; int trei = -1; int i1 = mainString.IndexOf("<tr>", searchStartIndex, StringComparison.OrdinalIgnoreCase); int i2 = mainString.IndexOf("<tr ", searchStartIndex, StringComparison.OrdinalIgnoreCase); if (i1 == -1 && i2 == -1) { break; } if (i1 == -1) { trsi = i2; } else if (i2 == -1) { trsi = i1; } else { trsi = Math.Min(i1, i2); } int openingTableTagEnd = mainString.IndexOf(">", trsi); trei = findMatchingEndTag(mainString, "tr", trsi); int rowIndex = table.addRow(); string subString = mainString.Substring(openingTableTagEnd + 1, trei - openingTableTagEnd - 1).Trim(' ', '\t', '\r', '\n'); List <object> objectsInRow = parseHtmlAsTableRow(subString); foreach (object o in objectsInRow) { table.addColumn(rowIndex, o); } searchStartIndex = trei + "</tr>".Length; } return(table); }
public static string GetValue(ParsedTable parsedTable, int[] indices) { if (indices.Length % 2 != 0) throw new InvalidOperationException(); int i = 0; object input = parsedTable; while (i < indices.Length) { ParsedTable table = (ParsedTable)input; input = table[indices[i], indices[i + 1]]; i += 2; } string inputVal = input.ToString(); inputVal = inputVal.Replace(" ", " "); inputVal = inputVal.Trim(); return inputVal; }
public static Object ParseHtmlIntoTables(string mainString, bool encapsulateInTable) { List<object> objectsOnPage = new List<object>(); if (string.IsNullOrEmpty(mainString)) return null; mainString = mainString.Trim(' ', '\t', '\r', '\n'); int searchStartIndex = 0; ParsedTable table = null; while (true) { int tsi = -1, tei = -1; if (string.IsNullOrEmpty(mainString)) break; int i1 = mainString.IndexOf("<table>", searchStartIndex, StringComparison.OrdinalIgnoreCase); int i2 = mainString.IndexOf("<table ", searchStartIndex, StringComparison.OrdinalIgnoreCase); if (i1 == -1 && i2 == -1) { break; } if (i1 == -1) tsi = i2; else if (i2 == -1) tsi = i1; else tsi = Math.Min(i1, i2); int openingTableTagEnd = mainString.IndexOf(">", tsi); tei = findMatchingEndTag(mainString, "table", tsi); if (tsi > searchStartIndex) { string subTextString = mainString.Substring(searchStartIndex, tsi - searchStartIndex).Trim(' ', '\t', '\r', '\n'); objectsOnPage.Add(subTextString); Debug.Assert(subTextString.IndexOf("</td>", StringComparison.OrdinalIgnoreCase) == -1); Debug.Assert(subTextString.IndexOf("</tr>", StringComparison.OrdinalIgnoreCase) == -1); Debug.Assert(subTextString.IndexOf("</table>", StringComparison.OrdinalIgnoreCase) == -1); } string subString = mainString.Substring(openingTableTagEnd + 1, tei - openingTableTagEnd - 1).Trim(' ', '\t', '\r', '\n'); table = parseHtmlAsTable(subString); objectsOnPage.Add(table); table = null; searchStartIndex = tei + "</table>".Length; } string endString = mainString.Substring(searchStartIndex).Trim(' ', '\t', '\r', '\n'); if (!string.IsNullOrEmpty(endString)) { objectsOnPage.Add(endString); Debug.Assert(endString.IndexOf("</td>", StringComparison.OrdinalIgnoreCase) == -1); Debug.Assert(endString.IndexOf("</tr>", StringComparison.OrdinalIgnoreCase) == -1); Debug.Assert(endString.IndexOf("</table>", StringComparison.OrdinalIgnoreCase) == -1); } if (objectsOnPage.Count == 0) return null; if (objectsOnPage.Count == 1) { object o = objectsOnPage[0]; if (o is ParsedTable) return o; if (!encapsulateInTable) return o; table = new ParsedTable(); table.addRow(); table.addColumn(0, o); return table; } else { table = new ParsedTable(); table.addRow(); foreach (object o in objectsOnPage) { table.addColumn(0, o); } return table; } }