public override SourceTable MakeSourceTable(HtmlNode divNode) { var head = divNode.SelectSingleNode(divNode.XPath.add("div[not(*)]", "//")); if (head == null) { head = divNode; } List <HtmlNode> html_tablerows = AdaptiveRowSelection(head, AdaptiveSteps); //divNode.SelectNodesInDepthRange(x => x.Name.Equals(TableSelectionTag, StringComparison.InvariantCultureIgnoreCase), TableSelectionDepthLimit, TableSelectionDepthStart, false); List <TableHtmlSourceRow> html_selected_rows = new List <TableHtmlSourceRow>(); Int32 columns = 0; foreach (HtmlNode row in html_tablerows) { TableHtmlSourceRow sourceRow = SetSourceRow(row); columns = Math.Max(columns, sourceRow.Count); if (sourceRow.Count > 0) { html_selected_rows.Add(sourceRow); } } Int32 rows = html_selected_rows.Count; SourceTable sourceTable = new SourceTable(columns, rows); if (!sourceTable.IsValid) { if (DoThrowException) { throw new ArgumentOutOfRangeException(nameof(columns), "Number of columns and rows can't be less than 1. Specified values: width [" + columns.ToString() + "], height [" + rows.ToString() + "]"); } } for (int i = 0; i < html_selected_rows.Count; i++) { TableHtmlSourceRow row_node = html_selected_rows[i]; for (int j = 0; j < row_node.RowCells.Count; j++) { SetSourceTableCell(sourceTable[j, i], row_node.RowCells[j], divNode.OwnerDocument); } } return(sourceTable); }
//public override MetaTableSchema GetTableSchema() //{ // return null; // //throw new NotImplementedException(); //} protected TableHtmlSourceRow SetSourceRow(HtmlNode row) { TableHtmlSourceRow sourceRow = new TableHtmlSourceRow(row); var html_cells = sourceRow.RowNode.SelectNodesInDepthRange( x => x.Name.Equals(RowSelectionTag, StringComparison.InvariantCultureIgnoreCase) && !x.ChildNodes.Any(y => y.Name.Equals(RowSelectionTag, StringComparison.InvariantCultureIgnoreCase)) , RowSelectionDepthLimit, RowSelectionDepthStart, false); //sourceRow.RowNode.SelectChildrenOnDepth("div", 2); if (html_cells.Count > 0) { foreach (var n in html_cells) { sourceRow.RowCells.Add(n); } } return(sourceRow); }
public override SourceTable MakeSourceTable(HtmlNode tableNode) { var html_tablerows = tableNode.SelectByTagName(tagName_row, 5); //.SelectNodes(HtmlExtractionTools.XPATH_SELECT_TABLEROWS); List <TableHtmlSourceRow> html_selected_rows = new List <TableHtmlSourceRow>(); Int32 columns = 0; foreach (HtmlNode row in html_tablerows) { TableHtmlSourceRow sourceRow = SetSourceRow(row); columns = Math.Max(columns, sourceRow.Count); if (sourceRow.Count > 0) { html_selected_rows.Add(sourceRow); } } Int32 rows = html_selected_rows.Count; if (rows + columns == 0) { return(null); } SourceTable sourceTable = new SourceTable(columns, rows); if (!sourceTable.IsValid) { if (DoThrowException) { throw new ArgumentOutOfRangeException(nameof(columns), "Number of columns and rows can't be less than 1. Specified values: width [" + columns.ToString() + "], height [" + rows.ToString() + "]"); } } for (int i = 0; i < html_selected_rows.Count; i++) { TableHtmlSourceRow row_node = html_selected_rows[i]; for (int j = 0; j < row_node.RowCells.Count; j++) { SetSourceTableCell(sourceTable[j, i], row_node.RowCells[j], tableNode.OwnerDocument); } } return(sourceTable); }
//public override MetaTableSchema GetTableSchema() //{ // return null; // //throw new NotImplementedException(); //} protected TableHtmlSourceRow SetSourceRow(HtmlNode row) { TableHtmlSourceRow sourceRow = new TableHtmlSourceRow(row); var html_cells = sourceRow.RowNode.SelectByTagName(tagName_normalcell, 5); Int32 row_width = 0; if (html_cells.Count == 0) { html_cells = sourceRow.RowNode.SelectByTagName(tagName_headingcell, 5); } if (html_cells.Count > 0) { foreach (var n in html_cells) { sourceRow.RowCells.Add(n); } } return(sourceRow); }
//public override MetaTableSchema GetTableSchema() //{ // return null; // //throw new NotImplementedException(); //} protected TableHtmlSourceRow SetSourceRow(HtmlNode row, DataPointMapEntry dp) { TableHtmlSourceRow sourceRow = new TableHtmlSourceRow(row); if (dp.Properties.Count > 0) { foreach (var dpi in dp.Properties) { var labelNode = row.selectSingleNode(dpi.LabelXPathRelative.GetRelativeXPath(row.XPath)); var dataNode = row.selectSingleNode(dpi.DataXPathRelative.GetRelativeXPath(row.XPath)); if (labelNode != null) { sourceRow.RowCells.Add(labelNode); } if (dataNode != null) { sourceRow.RowCells.Add(dataNode); } } } else { var labelNode = row.selectSingleNode(dp.LabelXPathRelative.GetRelativeXPath(row.XPath)); var dataNode = row.selectSingleNode(dp.DataXPathRelative.GetRelativeXPath(row.XPath)); if (labelNode != null) { sourceRow.RowCells.Add(labelNode); } if (dataNode != null) { sourceRow.RowCells.Add(dataNode); } } return(sourceRow); }