private HtmlTable(IHtmlTableElement e) { var headings = e.QuerySelectorAll("tr") .Where(x => x.Children.All(y => y is IHtmlTableHeaderCellElement)) .Select(x => (IHtmlTableRowElement)x) .ToList(); if (headings.Any()) { var rowChildren = headings.Select(x => (Row: x, Children: RowChildren(x).ToList())).ToList(); var num = rowChildren.First().Children.Count; foreach (var(row, children) in rowChildren.Skip(1)) { if (children.Count != num) { throw new HtmlElementException(row, $"Expected all of the rows to have the same amount of cells ({num}). But this one has {RowChildren(row).Count()}"); } } ColumnTitles = Linq.Range(num) .Select(i => rowChildren.Select(x => x.Children[i])) .Select(x => x.Select(y => y.TextContent.Trim()).Join(" ").Trim()) .ToList(); _rows = e.QuerySelectorAll("tr").Skip(headings.Count).Select(row => (IHtmlTableRowElement)row).ToList(); } else { ColumnTitles = Array.Empty <string>(); _rows = e.QuerySelectorAll("tr").Select(row => (IHtmlTableRowElement)row).ToList(); } _columnTitleToIndex = ColumnTitles.Enumerate() .Distinct((i1, i2) => i1.Index == i2.Index) .ToDictionary(tup => tup.Elem, tup => tup.Index); }
//Sanity check to ensure the page hasn't change format private static bool verifyHeaderNames(List <string> expected, IHtmlTableElement given) { List <string> headerNames = new List <string>(); AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> headerCells = given.QuerySelectorAll("th"); foreach (IHtmlTableHeaderCellElement headerCell in headerCells) { headerNames.Add(headerCell.TextContent); } if (!expected.ToList().SequenceEqual(headerNames)) { throw new Exception("Headers do not match. Expected: |" + String.Join(",", expected) + "| but found |" + String.Join(",", headerNames)); } return(true); }
static void Inspect(IHtmlTableElement table, List <Entry> list) { var entry = new Entry(); var cells = table.QuerySelectorAll("table td"); var blub = cells.Select(m => m.TextContent).ToArray(); var path = String.Empty; entry.Rules = cells[2].TextContent; entry.FileName = cells[4].TextContent; entry.Collection = cells[6].TextContent; entry.Text = cells[7].TextContent.Trim(); if (entry.Collection.StartsWith("IBM")) { path = IBMPath(entry.FileName); } else if (entry.Collection.StartsWith("James Clark")) { path = XmlTestPath(entry.FileName); } else if (entry.Collection.StartsWith("Sun")) { path = SunPath(entry.FileName); } else if (entry.Collection.StartsWith("OASIS")) { path = OasisPath(entry.FileName); } else if (entry.Collection.StartsWith("Fuji")) { path = XmlFujiPath(entry.FileName); } else { path = EdUniPath(entry.FileName); } if (File.Exists(path)) { entry.Content = File.ReadAllText(path); list.Add(entry); } }
static void Inspect(IHtmlTableElement table, List<Entry> list) { var entry = new Entry(); var cells = table.QuerySelectorAll("table td"); var blub = cells.Select(m => m.TextContent).ToArray(); var path = String.Empty; entry.Rules = cells[2].TextContent; entry.FileName = cells[4].TextContent; entry.Collection = cells[6].TextContent; entry.Text = cells[7].TextContent.Trim(); if (entry.Collection.StartsWith("IBM")) path = IBMPath(entry.FileName); else if (entry.Collection.StartsWith("James Clark")) path = XmlTestPath(entry.FileName); else if (entry.Collection.StartsWith("Sun")) path =SunPath(entry.FileName); else if (entry.Collection.StartsWith("OASIS")) path = OasisPath(entry.FileName); else if (entry.Collection.StartsWith("Fuji")) path = XmlFujiPath(entry.FileName); else path = EdUniPath(entry.FileName); if (File.Exists(path)) { entry.Content = File.ReadAllText(path); list.Add(entry); } }