public void LastChildOfEmptyElementIsNull() { var doc = new HtmlDocumentNode(); var lastChild = doc.LastChild; Assert.Null(lastChild); }
public void MixedAttributes() { string input = "<name attr1=\"value\" attr2 attr3=\'\'>"; TextFormatter formatter = new TextFormatter(input); NormalTagParser parser = new NormalTagParser(formatter); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("name", node.Name); Assert.AreEqual(3, node.Attributes.Count); Assert.AreEqual("attr1", node.Attributes[0].Name); Assert.AreEqual("attr2", node.Attributes[1].Name); Assert.AreEqual("attr3", node.Attributes[2].Name); Assert.AreEqual("value", node.Attributes[0].Value); Assert.AreEqual(null, node.Attributes[1].Value); Assert.AreEqual("", node.Attributes[2].Value); Assert.AreEqual(1, node.Flags.Count); Assert.AreEqual(true, node.Flags.Contains(Flags.NormalTag)); }
public void NodeDeleteTest() { string input = "<html><head>Header<br></head><body>Body<div>Content<hr></div></body></html>"; HtmlDocument doc = new HtmlDocument(input); doc.Parse(); HtmlDocumentNode root = doc.RootNode; Assert.AreEqual(root.InnerHtml, "<html><head>Header<br></head><body>Body<div>Content<hr></div></body></html>"); HtmlDocumentNode nodeToDelete = root.Descendants.FirstOrDefault(x => x.Name == "head"); root.DeleteNode(nodeToDelete); Assert.AreEqual(root.InnerHtml, "<html><body>Body<div>Content<hr></div></body></html>"); HtmlDocumentNode nodeToDelete2 = root.Descendants.FirstOrDefault(x => x.Name == "hr"); root.DeleteNode(nodeToDelete2); Assert.AreEqual(root.InnerHtml, "<html><body>Body<div>Content</div></body></html>"); HtmlDocumentNode nodeToDelete3 = root.Descendants.FirstOrDefault(x => x.Name == "#text" && x.OwnText == "Content"); root.DeleteNode(nodeToDelete3); Assert.AreEqual(root.InnerHtml, "<html><body>Body<div></div></body></html>"); }
public void FirstChildOfEmptyElementIsNull() { var doc = new HtmlDocumentNode(); var firstChild = doc.FirstChild; Assert.Null(firstChild); }
public void ParentNodeOfNotRootElementIsNotNull() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); Assert.Equal(doc, a.ParentNode); }
public void LastChildIsBr() { var doc = new HtmlDocumentNode(); doc.AppendChild(new HtmlElementNode("br")); var lastChild = doc.LastChild; Assert.Equal("br", lastChild.Name); }
/// <summary> /// get attribute value of a node /// </summary> /// <param name="node"></param> /// <param name="attrName"></param> /// <returns></returns> public static string GetAttributeValueByName(HtmlDocumentNode node, string attrName) { string val = string.Empty; if (node.Attributes.Any(a => a.Name == attrName)) { val = node.Attributes.First(a => a.Name == attrName).Value; } return(val); }
public void PreviousSiblingIsNull() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); var previousSibling = a.PreviousSibling; Assert.Null(previousSibling); }
public void NextSiblingIsNull() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); var nextSibling = a.NextSibling; Assert.Null(nextSibling); }
public void NextSiblingIsBr() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); doc.AppendChild(new HtmlElementNode("br")); var nextSibling = a.NextSibling; Assert.Equal("br", nextSibling.Name); }
public void PreviousSiblingIsBr() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(new HtmlElementNode("br")); doc.AppendChild(a); var previousSibling = a.PreviousSibling; Assert.Equal("br", previousSibling.Name); }
public override List <Chip> Parse(string url) { HtmlDocumentNode = Web.Load(url).DocumentNode; try { ResultGroups = HtmlDocumentNode.SelectNodes("//a[@class='link group-header']") .Select(n => n.GetAttributeValue("href", null)); } catch (ArgumentNullException e) { ResultGroups = null; } return(GetChipList()); }
public void InnerTextTest() { string input = "Text0<div id=\"divider\" class=\"big and small\">Text1<b>Text2</b>Text3<br/><i>Text4</i><img>Text5</div>Text6Text7<br>Text8"; HtmlDocument doc = new HtmlDocument(input); doc.Parse(); HtmlDocumentNode node = doc.RootNode; Assert.AreEqual("Text0Text1Text2Text3Text4Text5Text6Text7Text8", node.InnerText); }
public void Parse() { ParsedNode = new HtmlDocumentNode { Name = _name, Position = _source.Position, Line = _source.Line }; ParsedNode.Flags.Add(Flags.SpecialTag); AddAndSkipTagOpener(); AddAndSkipTagContent(); AddAndSkipTagCloser(); }
public void JsComment() { string input = "/*Comment Text*/"; TextFormatter formatter = new TextFormatter(input); SpecialTagParser parser = new SpecialTagParser("#jscomment", formatter, new SpecialTagParserConfiguration("/*", "*/", false)); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("#jscomment", node.Name); Assert.AreEqual("Comment Text", node.OwnText); }
public void Doctype() { string input = "<!doctype doctype content>"; TextFormatter formatter = new TextFormatter(input); SpecialTagParser parser = new SpecialTagParser("#doctype", formatter, new SpecialTagParserConfiguration("<!doctype ", ">", false)); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("#doctype", node.Name); Assert.AreEqual("doctype content", node.OwnText); }
public void Script() { string input = "<script>script code</script>"; TextFormatter formatter = new TextFormatter(input); SpecialTagParser parser = new SpecialTagParser("#script", formatter, new SpecialTagParserConfiguration("<script", "</script>", false)); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("#script", node.Name); Assert.AreEqual("script code", node.OwnText); }
public void Conditional() { string input = "<![if IE6]>Conditional Comment Text<!--Nested Comment--><![endif]>"; TextFormatter formatter = new TextFormatter(input); SpecialTagParser parser = new SpecialTagParser("#conditional", formatter, new SpecialTagParserConfiguration("<![if", "<![endif]>", false)); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("#conditional", node.Name); Assert.AreEqual("Conditional Comment Text<!--Nested Comment-->", node.OwnText); }
/// <summary> /// get links found only from search result section of the page /// </summary> /// <param name="nodes"></param> /// <returns></returns> public static string[] GetLinksFromSearchResult(HtmlDocumentNode bodyNode) { var allLinks = bodyNode.Descendants.Where(d => d.OwnHtml.StartsWith(Constants.A_START)).ToArray(); int i = 0; string[] links = new string[200]; foreach (HtmlDocumentNode childNode in allLinks) { //Note: comment out below "if condition" to find all appearances of the searchLink(including in ads or social networking pages) if (GetAttributeValueByName(childNode, Constants.HREF).StartsWith(Constants.A__START_URL)) { links[i] = childNode.OuterHtml; i++; } } return(links); }
public void XmlProcessingInstruction() { string input = "<? Instruction content ?>"; TextFormatter formatter = new TextFormatter(input); SpecialTagParser parser = new SpecialTagParser("#xmlprocessinginstruction", formatter, new SpecialTagParserConfiguration("<?", "?>", false)); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("#xmlprocessinginstruction", node.Name); Assert.AreEqual(" Instruction content ", node.OwnText); }
private void ParseAsText() { int position = _textFormatter.Position; int line = _textFormatter.Line; string text = _textFormatter.GetTextFromCurrentPositionToAnyStopString("<", "/*"); _currentNode = new HtmlDocumentNode() { Name = "#text", Position = position, Line = line, OwnText = text, OuterHtml = text, ParentNode = _currentParent }; _currentNode.Flags.Add(Flags.Text); AddCurrentNodeToCurrentParent(); }
public void Parse() { ParsedNode = new HtmlDocumentNode() { Position = _source.Position, Line = _source.Line }; SkipTagOpener(); if (IsFrontslash()) { ParseEndTag(); } else { ParseStartTag(); } }
private void Initialize() { _textFormatter = new TextFormatter(_documentHtml); RootNode = new HtmlDocumentNode() { Name = "#root" }; _currentParent = RootNode; _normalTagParser = new NormalTagParser(_textFormatter); _specialTagParsers = new List <SpecialTagParser>() { new SpecialTagParser("#doctype", _textFormatter, new SpecialTagParserConfiguration("<!doctype ", ">", false)), new SpecialTagParser("#conditional", _textFormatter, new SpecialTagParserConfiguration("<![if", "<![endif]>", false)), new SpecialTagParser("#conditionalcomment", _textFormatter, new SpecialTagParserConfiguration("<!--[if", "<![endif]-->", false)), new SpecialTagParser("#comment", _textFormatter, new SpecialTagParserConfiguration("<!--", "-->", false)), new SpecialTagParser("#jscomment", _textFormatter, new SpecialTagParserConfiguration("/*", "*/", false)), new SpecialTagParser("#xmlprocessinginstruction", _textFormatter, new SpecialTagParserConfiguration("<?", "?>", false)), new SpecialTagParser("script", _textFormatter, new SpecialTagParserConfiguration("<script", "</script>", false)) }; }
public void NormalTag() { string input = "<name>"; TextFormatter formatter = new TextFormatter(input); NormalTagParser parser = new NormalTagParser(formatter); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("name", node.Name); Assert.AreEqual(0, node.Attributes.Count); Assert.AreEqual(1, node.Flags.Count); Assert.AreEqual(true, node.Flags.Contains(Flags.NormalTag)); }
/// <summary> /// main method to scrap google and find position of a particular URL /// </summary> /// <param name="webUrl"></param> /// <param name="searchLink"></param> /// <returns></returns> static string Scrap(string webUrl, string searchLink) { List <string> list = new List <string>(); //searching for links StringBuilder builder = new StringBuilder(); byte[] ResultsBuffer = new byte[8192]; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webUrl); HttpWebResponse resp = (HttpWebResponse)request.GetResponse(); Stream resStream = resp.GetResponseStream(); string tempString = null; int count = 0; do { count = resStream.Read(ResultsBuffer, 0, ResultsBuffer.Length); if (count != 0) { tempString = Encoding.ASCII.GetString(ResultsBuffer, 0, count); builder.Append(tempString); } }while (count > 0); string html = builder.ToString(); HtmlParser.HtmlDocumentStructure.HtmlDocument docx = new HtmlParser.HtmlDocumentStructure.HtmlDocument(html); docx.Parse(); var allNodes = (List <HtmlDocumentNode>)docx.RootNode.Descendants; HtmlDocumentNode body = allNodes .Where(html => html.OwnHtml.StartsWith(Constants.BODY_START)).SingleOrDefault(); var anchors = GetLinksFromSearchResult(body); var positions = GetSearchURLPositions(anchors, searchLink); return(string.IsNullOrEmpty(positions) ? "0" : positions); }
public void AttributesWithQuotesWithoutValues() { string input = "<name attr=\"\" attr2=\'\'>"; TextFormatter formatter = new TextFormatter(input); NormalTagParser parser = new NormalTagParser(formatter); if (parser.CanParse()) { parser.Parse(); } HtmlDocumentNode node = parser.ParsedNode; Assert.AreEqual("name", node.Name); Assert.AreEqual(2, node.Attributes.Count); Assert.AreEqual("attr", node.Attributes[0].Name); Assert.AreEqual("attr2", node.Attributes[1].Name); Assert.AreEqual("", node.Attributes[0].Value); Assert.AreEqual("", node.Attributes[1].Value); Assert.AreEqual(1, node.Flags.Count); Assert.AreEqual(true, node.Flags.Contains(Flags.NormalTag)); }
private void ChooseCurrentNodeAsCurrentParent() { _currentParent = _currentNode; }
private void CreateNewNode() { _currentParser.Parse(); _currentNode = _currentParser.ParsedNode; _currentNode.ParentNode = _currentParent; if (IsEndTag()) { if (EndTagMatchesCurrentParent()) { _currentParent.Flags.Add(Flags.ContainsClosingTag); if (Configuration.IncludeClosingTagsInNodeTree) { _currentParent.Flags.Add(Flags.ClosingTagIncudedInNodeTree); } ChooseCurrentParentParentAsCurrentParent(); } else { HtmlDocumentNode parent = _currentParent; while (EndTagDoesNotMatchCurrentParent()) { ChooseCurrentParentParentAsCurrentParent(); if (EndTagDoesNotMatchAnyParent()) { _currentParent = parent; //Can do something with this return; } } _currentParent.Flags.Add(Flags.ContainsClosingTag); if (Configuration.IncludeClosingTagsInNodeTree) { _currentParent.Flags.Add(Flags.ClosingTagIncudedInNodeTree); } ChooseCurrentParentParentAsCurrentParent(); } if (Configuration.IncludeClosingTagsInNodeTree) { AddCurrentNodeToCurrentParent(); } } else { if (CurrentParentCanClosedByOpeningTag() && CurrentNodeCanCloseCurrentParent()) { ChooseCurrentParentParentAsCurrentParent(); _currentNode.ParentNode = _currentParent; } AddCurrentNodeToCurrentParent(); if (CurrentNodeCanHaveChildren()) { ChooseCurrentNodeAsCurrentParent(); } } }
internal override IEnumerable <string> GetResultPrice() { return(HtmlDocumentNode.SelectNodes("//div[@class='denoPrice']") .Select(n => n.InnerText)); }
public void ParentNodeOfRootElementIsNull() { var doc = new HtmlDocumentNode(); Assert.Null(doc.ParentNode); }