public override IEnumerable<TokenBase> GetTokens() { var propertiesStack = new Stack<TextVisualProperties>(); var item = new TextVisualProperties(); propertiesStack.Push(item); var top = new TokenIndex(); return _root.Elements(_ns + "body").SelectMany(b => ParseNodes(b, propertiesStack, top, 0)); }
protected IEnumerable<TokenBase> ParseText(string text, TokenIndex top) { foreach (string word in text.BreakToWords()) { if (string.IsNullOrEmpty(word)) yield return new WhitespaceToken(top.Index++); else yield return new TextToken(top.Index++, HttpUtility.HtmlDecode(word)); } }
public override IEnumerable<TokenBase> GetTokens() { var propertiesStack = new Stack<TextVisualProperties>(); var item = new TextVisualProperties(); propertiesStack.Push(item); var top = new TokenIndex(); foreach (EpubSpineItem spineItem in GetSpineItems()) { yield return new NewPageToken(top.Index++); AddAnchor(top, (_opfPath) + spineItem.Path); foreach (TokenBase token in ParseSpineItem(spineItem, propertiesStack, top)) { yield return token; } } }
public override IEnumerable<TokenBase> GetTokens() { var top = new TokenIndex(); string line; while ((line = _reader.ReadLine()) != null) { if(string.IsNullOrEmpty(line)) continue; var p = new XElement("p"); var property = new TextVisualProperties { TextIndent = 32.0, Inline = false }; yield return new TagOpenToken(top.Index++, p, property, -1); foreach (TokenBase baseToken in ParseText(line, top)) yield return baseToken; yield return new TagCloseToken(top.Index++, -1); } }
private IEnumerable<TokenBase> ParseTokens(HtmlNode container, Stack<TextVisualProperties> propertiesStack, TokenIndex top, int parentID = -1) { foreach (HtmlNode child in container.ChildNodes) { var node = child as HtmlTextNode; if (node != null) { if (!string.IsNullOrEmpty(node.Text)) { foreach (TokenBase token in ParseText(node.Text, top)) { yield return token; } } } else { TextVisualProperties properties = propertiesStack.Peek().Clone().Update(child, _css); properties.LinkID = string.Empty; if (child.Name == "a" || child.Name == "span") { ParseAnchors(top, child); } if (child.Name == "a") { string attributeValue = child.GetAttributeValue("href", string.Empty); if (!string.IsNullOrEmpty(attributeValue)) { properties.LinkID = attributeValue; } } //TODO: add images support //if (string.Equals(child.Name, imageParser.ImageTag)) //{ // int oldTopIndex; // HtmlAttributeCollection attributes = child.Attributes; // string imagePath = attributes.Contains("src") ? attributes["src"].Value : string.Empty; // top.Index = (oldTopIndex = top.Index) + 1; // var pictureToken = new PictureToken(oldTopIndex) // { // ImageID = imagePath // }; // yield return pictureToken; //} //else { if (child is HtmlCommentNode) continue; var tagOpen = new TagOpenToken(top.Index++, child, properties, parentID); yield return tagOpen; propertiesStack.Push(properties); foreach (TokenBase token in ParseTokens(child, propertiesStack, top, tagOpen.ID)) { yield return token; } propertiesStack.Pop(); yield return new TagCloseToken(top.Index++, parentID); } } } }
private void AddAnchor(TokenIndex top, string path) { _anchors[path] = top.Index; }
private void ParseAnchors(TokenIndex top, HtmlNode node) { foreach (string str in new[] { "id", "name" }) { string attributeValue = node.GetAttributeValue(str, string.Empty); if (!string.IsNullOrEmpty(attributeValue)) { AddAnchor(top, attributeValue); } } }
private IEnumerable<TokenBase> ParseNodes(XContainer container, Stack<TextVisualProperties> propertiesStack, TokenIndex top, int bookLevel, int parentID = -1) { foreach (XNode node in container.Nodes()) { var text = node as XText; if ((text != null) && !string.IsNullOrEmpty(text.Value)) { foreach (TokenBase token in ParseText(text.Value, top)) { yield return token; } } var element = node as XElement; if(element == null) continue; TextVisualProperties properties = propertiesStack.Peek().Clone().Update(element, _styleSheet); string localName = element.Name.LocalName; int level = bookLevel; if (localName == "a") { ProcessLinks(properties, element); } ProcessAnchors(top, element); if (localName == "section") { yield return new NewPageToken(top.Index++); level++; } if (localName == "title") { ProcessTitleData(top, element, level); } if (localName == "image") { XAttribute hrefAttr = element.Attributes().FirstOrDefault(t => (t.Name.LocalName == "href")); string href = ((hrefAttr != null) ? hrefAttr.Value : string.Empty).TrimStart('#'); var pictureToken = new PictureToken(top.Index++, href); yield return pictureToken; } else { var tagOpen = new TagOpenToken(top.Index++, element, properties, parentID); yield return tagOpen; propertiesStack.Push(properties); foreach (TokenBase token in ParseNodes(element, propertiesStack, top, level, tagOpen.ID)) { yield return token; } propertiesStack.Pop(); yield return new TagCloseToken(top.Index++, parentID); } } }
private void ProcessTitleData(TokenIndex top, XElement xelement, int bookLevel) { var item = new BookChapter { Level = bookLevel, Title = GetText(xelement), TokenID = top.Index }; _chapters.Add(item); }
private void ProcessAnchors(TokenIndex top, XElement xelement) { XAttribute attribute = xelement.Attributes().FirstOrDefault(t => (t.Name.LocalName == "id")); if (attribute != null) { _anchors[attribute.Value] = top.Index; } }
private IEnumerable<TokenBase> ParseSpineItem(EpubSpineItem item, Stack<TextVisualProperties> propertiesStack, TokenIndex top) { HtmlNode pageBody = GetPageBody(item); return ParseNodes(pageBody, propertiesStack, top, _opfPath + item.Path); }
private IEnumerable<TokenBase> ParseNodes(HtmlNode container, Stack<TextVisualProperties> propertiesStack, TokenIndex top, EpubPath path, int parentID = -1) { foreach (HtmlNode child in container.ChildNodes) { var asText = child as HtmlTextNode; if (asText != null && !string.IsNullOrEmpty(asText.Text)) { foreach (TokenBase text in ParseText(asText.Text, top)) { yield return text; } } else { TextVisualProperties properties = propertiesStack.Peek().Clone().Update(child, _css); properties.LinkID = string.Empty; if (child.Name == "a" || child.Name == "span") { ParseAnchors(top, path, child); } if (child.Name == "a") { string href = child.GetAttributeValue("href", string.Empty); if (!string.IsNullOrEmpty(href)) { if (href.StartsWith("#")) { properties.LinkID = path.CurrentFilePath + href; } else { properties.LinkID = path + href; } } } if (string.Equals(child.Name, "img")) { HtmlAttributeCollection attributes = child.Attributes; string src = attributes.Contains("src") ? attributes["src"].Value : string.Empty; var pictureToken = new PictureToken(top.Index++, (path) + src); yield return pictureToken; } else { if(child is HtmlCommentNode) continue; var tagOpenToken = new TagOpenToken(top.Index++, child, properties, parentID); yield return tagOpenToken; propertiesStack.Push(properties); foreach (TokenBase token in ParseNodes(child, propertiesStack, top, path, tagOpenToken.ID)) { yield return token; } propertiesStack.Pop(); yield return new TagCloseToken(top.Index++, parentID); } } } }