Exemple #1
0
        public void Creation()
        {
            HtmlPath path = HtmlPath.Parse("/BODY[0]/DIV[5]/DIV[0]/DIV[1]/TABLE[7]/TBODY[0]/TR[6]/TD[1]");

            HtmlTable table = myDocument.GetTableByPath(path);

            Assert.AreEqual("TABLE", table.TableElement.TagName);
            Assert.AreEqual("TBODY", table.TableBody.TagName);
            Assert.AreEqual(9, table.Rows.Count());
        }
Exemple #2
0
        /// <summary>
        /// Extracts the complete html table the given path is pointing to. If the path points
        /// to a cell of a table the complete table is extracted still.
        /// <remarks>
        /// Returns null if table not found by path. Currently we cannot handle thead
        /// and tfoot. The number of the column is defined by the html table row with the most
        /// html columns
        /// </remarks>
        /// </summary>
        /// <param name="doc">the HTML document</param>
        /// <param name="path">the path to the table</param>
        /// <param name="textOnly">set this to true to get only the text of the cell, otherwise the
        /// cell itself as HtmlElement is returned</param>
        public static FallibleActionResult <DataTable> ExtractTable(this IHtmlDocument doc, HtmlPath path, bool textOnly)
        {
            doc.Require(x => doc != null);
            path.Require(x => path != null);

            HtmlTable htmlTable = doc.GetTableByPath(path);

            if (htmlTable == null)
            {
                return(FallibleActionResult <DataTable> .CreateFailureResult("Could not get table by path"));
            }

            DataTable table = new DataTable();

            // TODO: should we get the culture from the HTML page somehow?
            table.Locale = CultureInfo.InvariantCulture;

            Func <IHtmlElement, object> GetContent = element => (textOnly ? (object)element.InnerText : element);

            foreach (var tr in htmlTable.Rows)
            {
                var htmlRow = new List <IHtmlElement>();
                foreach (var td in tr.Children)
                {
                    if (td.TagName == "TD" || td.TagName == "TH")
                    {
                        htmlRow.Add(td);
                    }
                }

                // add columns if necessary
                if (htmlRow.Count > table.Columns.Count)
                {
                    (htmlRow.Count - table.Columns.Count).Times(x => table.Columns.Add(string.Empty, typeof(object)));
                }

                // add new row to table
                DataRow row = table.NewRow();
                table.Rows.Add(row);
                table.AcceptChanges();

                // add data
                htmlRow.ForeachIndex((element, idx) => row[idx] = GetContent(element));
            }

            if (table.Rows.Count == 0)
            {
                table.Dispose();
                return(FallibleActionResult <DataTable> .CreateFailureResult("Table was empty"));
            }

            return(FallibleActionResult <DataTable> .CreateSuccessResult(table));
        }