Exemplo n.º 1
0
        private HtmlTable(IHtmlTableElement e)
        {
            var headings = e.QuerySelectorAll("tr")
                           .Where(x => x.Children.All(y => y is IHtmlTableHeaderCellElement))
                           .Select(x => (IHtmlTableRowElement)x)
                           .ToList();

            if (headings.Any())
            {
                var rowChildren = headings.Select(x => (Row: x, Children: RowChildren(x).ToList())).ToList();

                var num = rowChildren.First().Children.Count;
                foreach (var(row, children) in rowChildren.Skip(1))
                {
                    if (children.Count != num)
                    {
                        throw new HtmlElementException(row,
                                                       $"Expected all of the rows to have the same amount of cells ({num}). But this one has {RowChildren(row).Count()}");
                    }
                }

                ColumnTitles = Linq.Range(num)
                               .Select(i => rowChildren.Select(x => x.Children[i]))
                               .Select(x => x.Select(y => y.TextContent.Trim()).Join(" ").Trim())
                               .ToList();

                _rows = e.QuerySelectorAll("tr").Skip(headings.Count).Select(row => (IHtmlTableRowElement)row).ToList();
            }
            else
            {
                ColumnTitles = Array.Empty <string>();

                _rows = e.QuerySelectorAll("tr").Select(row => (IHtmlTableRowElement)row).ToList();
            }

            _columnTitleToIndex = ColumnTitles.Enumerate()
                                  .Distinct((i1, i2) => i1.Index == i2.Index)
                                  .ToDictionary(tup => tup.Elem, tup => tup.Index);
        }
Exemplo n.º 2
0
        //Sanity check to ensure the page hasn't change format
        private static bool verifyHeaderNames(List <string> expected, IHtmlTableElement given)
        {
            List <string> headerNames = new List <string>();

            AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> headerCells = given.QuerySelectorAll("th");
            foreach (IHtmlTableHeaderCellElement headerCell in headerCells)
            {
                headerNames.Add(headerCell.TextContent);
            }

            if (!expected.ToList().SequenceEqual(headerNames))
            {
                throw new Exception("Headers do not match. Expected: |" + String.Join(",", expected) + "| but found |" + String.Join(",", headerNames));
            }
            return(true);
        }
Exemplo n.º 3
0
        static void Inspect(IHtmlTableElement table, List <Entry> list)
        {
            var entry = new Entry();
            var cells = table.QuerySelectorAll("table td");
            var blub  = cells.Select(m => m.TextContent).ToArray();
            var path  = String.Empty;

            entry.Rules      = cells[2].TextContent;
            entry.FileName   = cells[4].TextContent;
            entry.Collection = cells[6].TextContent;
            entry.Text       = cells[7].TextContent.Trim();

            if (entry.Collection.StartsWith("IBM"))
            {
                path = IBMPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("James Clark"))
            {
                path = XmlTestPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("Sun"))
            {
                path = SunPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("OASIS"))
            {
                path = OasisPath(entry.FileName);
            }
            else if (entry.Collection.StartsWith("Fuji"))
            {
                path = XmlFujiPath(entry.FileName);
            }
            else
            {
                path = EdUniPath(entry.FileName);
            }

            if (File.Exists(path))
            {
                entry.Content = File.ReadAllText(path);
                list.Add(entry);
            }
        }
Exemplo n.º 4
0
        static void Inspect(IHtmlTableElement table, List<Entry> list)
        {
            var entry = new Entry();
            var cells = table.QuerySelectorAll("table td");
            var blub = cells.Select(m => m.TextContent).ToArray();
            var path = String.Empty;
            entry.Rules = cells[2].TextContent;
            entry.FileName = cells[4].TextContent;
            entry.Collection = cells[6].TextContent;
            entry.Text = cells[7].TextContent.Trim();

            if (entry.Collection.StartsWith("IBM"))
                path = IBMPath(entry.FileName);
            else if (entry.Collection.StartsWith("James Clark"))
                path = XmlTestPath(entry.FileName);
            else if (entry.Collection.StartsWith("Sun"))
                path =SunPath(entry.FileName);
            else if (entry.Collection.StartsWith("OASIS"))
                path = OasisPath(entry.FileName);
            else if (entry.Collection.StartsWith("Fuji"))
                path = XmlFujiPath(entry.FileName);
            else
                path = EdUniPath(entry.FileName);

            if (File.Exists(path))
            {
                entry.Content = File.ReadAllText(path);
                list.Add(entry);
            }
        }