public virtual Post CreatePost(HtmlNodeWrapper primaryNode, HtmlNodeWrapper secondaryNode) { string titleHtml; string authorHtml; string postHref; string pointsHtml; string commentsHtml; string rankHtml; try { titleHtml = primaryNode.GetInnerHtml(@".//a[@class='storylink']"); authorHtml = secondaryNode.GetInnerHtml(@".//a[@class='hnuser']"); postHref = primaryNode.GetAttributeValue("href", @".//a[@class='storylink']", null); pointsHtml = secondaryNode.GetInnerHtml(@".//span[@class='score']"); commentsHtml = secondaryNode.GetInnerHtml(@".//td[@class='subtext']/a[last()]"); rankHtml = primaryNode.GetInnerHtml(@".//span[@class='rank']"); } catch (NullReferenceException) { // a node could not be found on the page throw new ValidationException(MissingNodes); } return(new Post { Title = this.ParseTitle(titleHtml), Author = this.ParseAuthor(authorHtml), Uri = this.ParseUri(postHref), Points = this.ParsePositiveInt(pointsHtml, PointsLessThanZero), CommentsCount = this.ParsePositiveInt(commentsHtml, CommentsLessThanZero), Rank = this.ParsePositiveInt(rankHtml, RankLessThanZero), }); }
private TestResult CheckVerses(string input, string expectedOutput, Action <IDocumentParseContextEditor> initDocParseContext, string[] notFoundVerses, params string[] verses) { var isReadonly = false; if (string.IsNullOrEmpty(expectedOutput)) { isReadonly = true; expectedOutput = input; } var mockDocumentId = new FileDocumentId(0, null, isReadonly); if (verses == null) { verses = new string[0]; } initDocParseContext?.Invoke(documentParseContext); var htmlDoc = new HtmlNodeWrapper(input); ParagraphParseResult result; using (var docParser = this.documentParserFactory.Create(documentProvider, mockDocumentId)) { result = docParser.ParseParagraph(htmlDoc); } Assert.AreEqual(verses.Length, result.VerseEntries.Count, "Verses length is not the same. Expected: {0}. Found: {1}", verses.Length, result.VerseEntries.Count); var versePointers = result.VerseEntries.Select(ve => ve.VersePointer); foreach (var verse in verses) { Assert.IsTrue(versePointers.Contains(this.versePointerFactory.CreateVersePointer(verse)), "Can not find the verse: '{0}'", verse); } Assert.AreEqual(expectedOutput, htmlDoc.InnerXml, "The output html is wrong."); Assert.AreEqual(new HtmlToTextConverter().SimpleConvert(input).Replace(" ", " "), result.Text, "Text parts do not contain the full input string."); if (notFoundVerses != null) { Assert.AreEqual(notFoundVerses.Length, result.NotFoundVerses.Count); foreach (var verse in notFoundVerses) { Assert.IsTrue(result.NotFoundVerses.Contains(this.versePointerFactory.CreateVersePointer(verse))); } } return(new TestResult() { Node = htmlDoc, Result = result }); }
private void ParseNode(IDocumentParser docParser, XElement node) { var state = GetParagraphType(node); if (state.IsHierarchical()) { using (docParser.ParseHierarchyElement(state)) { foreach (var childNode in node.Elements()) { ParseNode(docParser, childNode); } } } else { if (!string.IsNullOrEmpty(node.Value.Trim())) { var htmlNode = new HtmlNodeWrapper(node.Value); docParser.ParseParagraph(htmlNode); node.Value = htmlNode.InnerXml; } } }