public void Rows_WhenCalled_AllTRElementsReturned() { var doc = HtmlDocumentLoader.LoadHtml(GetHtml()); var table = HtmlTable.GetByPath(doc, HtmlPath.Parse("/BODY[0]/TABLE[0]")); Assert.That(table.Rows.Count, Is.EqualTo(4)); }
public void GetPath_SimplePath_PathFound() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div id='xx'/></body></html>"); var path = doc.GetElementById("xx").GetPath(); Assert.That(path.ToString(), Is.EqualTo("/BODY[0]/DIV[0]")); }
public void GetPath_MultipleElementsWithSameTagName_CorrectPathWithCorrectChildrenPosFound() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div/><div/><div><P/><P/><p id='xx'/></div></body></html>"); var path = doc.GetElementById("xx").GetPath(); Assert.That(path.ToString(), Is.EqualTo("/BODY[0]/DIV[2]/P[2]")); }
public void Smoke() { //arrange var loader = new HtmlDocumentLoader(); //act/assert Assert.That(loader, Is.Not.Null); }
public void GetByName_FormExists_FormFound() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><form name='xx' id='xx'/></body></html>"); var form = HtmlForm.GetByName(doc, "xx"); Assert.That(form.FormElement, Is.EqualTo(doc.GetElementById("xx"))); }
public void GetByName_MultipleForms_CorrectFormFound() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div><p><form name='NO' id='NO'/></p><form name='xx' id='xx'/></div></body></html>"); var form = HtmlForm.GetByName(doc, "xx"); Assert.That(form.FormElement, Is.EqualTo(doc.GetElementById("xx"))); }
public void GetByName_NoFormWithThisName_ReturnsNull() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div id='xx'/></body></html>"); var form = HtmlForm.GetByName(doc, "not-existing"); Assert.That(form, Is.Null); }
public void GetRoot_SomeChild_RootReturned() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div id='xx'/></body></html>"); var root = doc.GetElementById("xx").GetRoot(); Assert.That(root, Is.EqualTo(doc.Body.Parent)); }
public void Ctor_NotATableElement_Throws() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div id='xx'/></body></html>"); var element = doc.GetElementById("xx"); var ex = Assert.Throws <ArgumentException>(() => new HtmlTable(element)); Assert.That(ex.Message, Does.Contain("not a html table element")); }
public void GetCell_WithRowAndColumn_ReturnsCorrectCell() { var doc = HtmlDocumentLoader.LoadHtml(GetHtml()); var table = HtmlTable.GetByPath(doc, HtmlPath.Parse("/BODY[0]/TABLE[0]")); var element = table.GetCell(1, 2); Assert.That(element.Id, Is.EqualTo("c12")); }
public void GetColumn_WithCell_ReturnsCorrectTDs() { var doc = HtmlDocumentLoader.LoadHtml(GetHtml()); var table = HtmlTable.GetByPath(doc, HtmlPath.Parse("/BODY[0]/TABLE[0]")); var column = table.GetColumn(doc.GetElementById("c12")); Assert.That(column.Select(td => td.Id), Is.EquivalentTo(new[] { "c02", "c12", "c22", "c32" })); }
public void GetRow_WithCell_ReturnsCorrectTDs() { var doc = HtmlDocumentLoader.LoadHtml(GetHtml()); var table = HtmlTable.GetByPath(doc, HtmlPath.Parse("/BODY[0]/TABLE[0]")); var row = table.GetRow(doc.GetElementById("c12")); Assert.That(row[0].Parent.Id, Is.EqualTo("row1")); }
public void ColumnIndexOf_WithCell_ReturnsCorrectIndex() { var doc = HtmlDocumentLoader.LoadHtml(GetHtml()); var table = HtmlTable.GetByPath(doc, HtmlPath.Parse("/BODY[0]/TABLE[0]")); var idx = table.ColumnIndexOf(doc.GetElementById("c12")); Assert.That(idx, Is.EqualTo(2)); }
public void GetElementByPath_SimplePath_ElementFound() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div id='xx'/></body></html>"); var path = HtmlPath.Parse("/BODY[0]/div[0]"); var element = doc.GetElementByPath(path); Assert.That(element, Is.EqualTo(doc.GetElementById("xx"))); }
public void GetElementByPath_MultipleElementsWithSameTagName_ElementFound() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div/><div/><div><P/><P/><p id='xx'/></div></body></html>"); var path = HtmlPath.Parse("/BODY[0]/div[2]/P[2]"); var element = doc.GetElementByPath(path); Assert.That(element, Is.EqualTo(doc.GetElementById("xx"))); }
public void GetElementByPath_NoElementWithSuchPath_ReturnsNull() { var doc = HtmlDocumentLoader.LoadHtml("<html><body><div id='xx'/></body></html>"); var path = HtmlPath.Parse("/html[0]/BODY[0]/P[0]"); var element = doc.GetElementByPath(path); Assert.That(element, Is.Null); }
public void GetElementByPath_PathPointsToBody_BodyReturned() { var doc = HtmlDocumentLoader.LoadHtml("<html><body></body></html>"); var path = HtmlPath.Parse("/body[0]"); var element = doc.GetElementByPath(path); Assert.That(element, Is.EqualTo(doc.Body)); }
public void LoadDocument() { //arrange var loader = new HtmlDocumentLoader(); //act var document = loader.LoadDocument(@"http://cnn.com"); //post Assert.That(document, Is.Not.Null); }
static void Main(string[] args) { var logger = new ConsoleLogger(); try { var loader = new HtmlDocumentLoader(); var repository = new CrawlerRepository(); //var crawlers = new ICrawler[] { new RabotaUaCrawler(logger), new CareersStackoverfowComCrawler(logger) }; var crawlers = new ICrawler[] { new CareersStackoverfowComCrawler(logger) }; foreach (var crawler in crawlers) { crawler.Crawl(loader, repository); } } catch (Exception e) { logger.Log("FAILED exception caught in Main() method. Exception message: " + e.Message); logger.Log(e.StackTrace); } }
public static IHtmlDocument LoadHtml(string html) { return(HtmlDocumentLoader.LoadHtml(html)); }
public void GetRoot_WithRoot_RootReturned() { var doc = HtmlDocumentLoader.LoadHtml("<html><body></body></html>"); Assert.That(doc.Body.GetRoot(), Is.EqualTo(doc.Body.Parent)); }