public void Creation() { HtmlPath path = HtmlPath.Parse("/BODY[0]/DIV[5]/DIV[0]/DIV[1]/TABLE[7]/TBODY[0]/TR[6]/TD[1]"); HtmlTable table = myDocument.GetTableByPath(path); Assert.AreEqual("TABLE", table.TableElement.TagName); Assert.AreEqual("TBODY", table.TableBody.TagName); Assert.AreEqual(9, table.Rows.Count()); }
/// <summary> /// Extracts the complete html table the given path is pointing to. If the path points /// to a cell of a table the complete table is extracted still. /// <remarks> /// Returns null if table not found by path. Currently we cannot handle thead /// and tfoot. The number of the column is defined by the html table row with the most /// html columns /// </remarks> /// </summary> /// <param name="doc">the HTML document</param> /// <param name="path">the path to the table</param> /// <param name="textOnly">set this to true to get only the text of the cell, otherwise the /// cell itself as HtmlElement is returned</param> public static FallibleActionResult <DataTable> ExtractTable(this IHtmlDocument doc, HtmlPath path, bool textOnly) { doc.Require(x => doc != null); path.Require(x => path != null); HtmlTable htmlTable = doc.GetTableByPath(path); if (htmlTable == null) { return(FallibleActionResult <DataTable> .CreateFailureResult("Could not get table by path")); } DataTable table = new DataTable(); // TODO: should we get the culture from the HTML page somehow? table.Locale = CultureInfo.InvariantCulture; Func <IHtmlElement, object> GetContent = element => (textOnly ? (object)element.InnerText : element); foreach (var tr in htmlTable.Rows) { var htmlRow = new List <IHtmlElement>(); foreach (var td in tr.Children) { if (td.TagName == "TD" || td.TagName == "TH") { htmlRow.Add(td); } } // add columns if necessary if (htmlRow.Count > table.Columns.Count) { (htmlRow.Count - table.Columns.Count).Times(x => table.Columns.Add(string.Empty, typeof(object))); } // add new row to table DataRow row = table.NewRow(); table.Rows.Add(row); table.AcceptChanges(); // add data htmlRow.ForeachIndex((element, idx) => row[idx] = GetContent(element)); } if (table.Rows.Count == 0) { table.Dispose(); return(FallibleActionResult <DataTable> .CreateFailureResult("Table was empty")); } return(FallibleActionResult <DataTable> .CreateSuccessResult(table)); }