public override SourceTable MakeSourceTable(HtmlNode divNode)
        {
            var head = divNode.SelectSingleNode(divNode.XPath.add("div[not(*)]", "//"));

            if (head == null)
            {
                head = divNode;
            }
            List <HtmlNode> html_tablerows = AdaptiveRowSelection(head, AdaptiveSteps); //divNode.SelectNodesInDepthRange(x => x.Name.Equals(TableSelectionTag, StringComparison.InvariantCultureIgnoreCase), TableSelectionDepthLimit, TableSelectionDepthStart, false);

            List <TableHtmlSourceRow> html_selected_rows = new List <TableHtmlSourceRow>();

            Int32 columns = 0;

            foreach (HtmlNode row in html_tablerows)
            {
                TableHtmlSourceRow sourceRow = SetSourceRow(row);
                columns = Math.Max(columns, sourceRow.Count);
                if (sourceRow.Count > 0)
                {
                    html_selected_rows.Add(sourceRow);
                }
            }
            Int32 rows = html_selected_rows.Count;

            SourceTable sourceTable = new SourceTable(columns, rows);

            if (!sourceTable.IsValid)
            {
                if (DoThrowException)
                {
                    throw new ArgumentOutOfRangeException(nameof(columns), "Number of columns and rows can't be less than 1. Specified values: width [" + columns.ToString() + "], height [" + rows.ToString() + "]");
                }
            }

            for (int i = 0; i < html_selected_rows.Count; i++)
            {
                TableHtmlSourceRow row_node = html_selected_rows[i];

                for (int j = 0; j < row_node.RowCells.Count; j++)
                {
                    SetSourceTableCell(sourceTable[j, i], row_node.RowCells[j], divNode.OwnerDocument);
                }
            }

            return(sourceTable);
        }
        //public override MetaTableSchema GetTableSchema()
        //{
        //    return null;
        //    //throw new NotImplementedException();
        //}

        protected TableHtmlSourceRow SetSourceRow(HtmlNode row)
        {
            TableHtmlSourceRow sourceRow = new TableHtmlSourceRow(row);

            var html_cells = sourceRow.RowNode.SelectNodesInDepthRange(
                x => x.Name.Equals(RowSelectionTag, StringComparison.InvariantCultureIgnoreCase) &&
                !x.ChildNodes.Any(y => y.Name.Equals(RowSelectionTag, StringComparison.InvariantCultureIgnoreCase))
                , RowSelectionDepthLimit, RowSelectionDepthStart, false);  //sourceRow.RowNode.SelectChildrenOnDepth("div", 2);

            if (html_cells.Count > 0)
            {
                foreach (var n in html_cells)
                {
                    sourceRow.RowCells.Add(n);
                }
            }
            return(sourceRow);
        }
Beispiel #3
0
        public override SourceTable MakeSourceTable(HtmlNode tableNode)
        {
            var html_tablerows = tableNode.SelectByTagName(tagName_row, 5); //.SelectNodes(HtmlExtractionTools.XPATH_SELECT_TABLEROWS);
            List <TableHtmlSourceRow> html_selected_rows = new List <TableHtmlSourceRow>();

            Int32 columns = 0;

            foreach (HtmlNode row in html_tablerows)
            {
                TableHtmlSourceRow sourceRow = SetSourceRow(row);
                columns = Math.Max(columns, sourceRow.Count);
                if (sourceRow.Count > 0)
                {
                    html_selected_rows.Add(sourceRow);
                }
            }
            Int32 rows = html_selected_rows.Count;

            if (rows + columns == 0)
            {
                return(null);
            }

            SourceTable sourceTable = new SourceTable(columns, rows);

            if (!sourceTable.IsValid)
            {
                if (DoThrowException)
                {
                    throw new ArgumentOutOfRangeException(nameof(columns), "Number of columns and rows can't be less than 1. Specified values: width [" + columns.ToString() + "], height [" + rows.ToString() + "]");
                }
            }
            for (int i = 0; i < html_selected_rows.Count; i++)
            {
                TableHtmlSourceRow row_node = html_selected_rows[i];

                for (int j = 0; j < row_node.RowCells.Count; j++)
                {
                    SetSourceTableCell(sourceTable[j, i], row_node.RowCells[j], tableNode.OwnerDocument);
                }
            }

            return(sourceTable);
        }
Beispiel #4
0
        //public override MetaTableSchema GetTableSchema()
        //{
        //    return null;
        //    //throw new NotImplementedException();
        //}

        protected TableHtmlSourceRow SetSourceRow(HtmlNode row)
        {
            TableHtmlSourceRow sourceRow = new TableHtmlSourceRow(row);

            var   html_cells = sourceRow.RowNode.SelectByTagName(tagName_normalcell, 5);
            Int32 row_width  = 0;

            if (html_cells.Count == 0)
            {
                html_cells = sourceRow.RowNode.SelectByTagName(tagName_headingcell, 5);
            }

            if (html_cells.Count > 0)
            {
                foreach (var n in html_cells)
                {
                    sourceRow.RowCells.Add(n);
                }
            }
            return(sourceRow);
        }
Beispiel #5
0
        //public override MetaTableSchema GetTableSchema()
        //{
        //    return null;
        //    //throw new NotImplementedException();
        //}


        protected TableHtmlSourceRow SetSourceRow(HtmlNode row, DataPointMapEntry dp)
        {
            TableHtmlSourceRow sourceRow = new TableHtmlSourceRow(row);


            if (dp.Properties.Count > 0)
            {
                foreach (var dpi in dp.Properties)
                {
                    var labelNode = row.selectSingleNode(dpi.LabelXPathRelative.GetRelativeXPath(row.XPath));
                    var dataNode  = row.selectSingleNode(dpi.DataXPathRelative.GetRelativeXPath(row.XPath));

                    if (labelNode != null)
                    {
                        sourceRow.RowCells.Add(labelNode);
                    }
                    if (dataNode != null)
                    {
                        sourceRow.RowCells.Add(dataNode);
                    }
                }
            }
            else
            {
                var labelNode = row.selectSingleNode(dp.LabelXPathRelative.GetRelativeXPath(row.XPath));
                var dataNode  = row.selectSingleNode(dp.DataXPathRelative.GetRelativeXPath(row.XPath));


                if (labelNode != null)
                {
                    sourceRow.RowCells.Add(labelNode);
                }
                if (dataNode != null)
                {
                    sourceRow.RowCells.Add(dataNode);
                }
            }

            return(sourceRow);
        }