public EpubSummaryParser(Stream source) { _zip = ZipContainer.Unzip(source); XDocument xmlDocument = _zip.GetFileStream("META-INF/container.xml").GetXmlDocument(); XElement root = xmlDocument.Root; if (root == null) throw new DataException(InvalidEpubMetaInfo); XAttribute attribute = root.Attribute("xmlns"); XNamespace xmlns = (attribute != null) ? XNamespace.Get(attribute.Value) : XNamespace.None; XAttribute fullPath = xmlDocument.Descendants(xmlns + "rootfile").First().Attribute("full-path"); if (fullPath == null) throw new DataException(InvalidEpubMetaInfo); string path = fullPath.Value; _opfPath = path; _opf = _zip.GetFileStream(path).GetXmlDocument(); _opfRoot = _opf.Root; if (_opfRoot == null) throw new DataException(InvalidEpubMetaInfo); _oebps = GetPath(path); _opfns = XNamespace.Get("http://www.idpf.org/2007/opf"); _opfdc = XNamespace.Get("http://purl.org/dc/elements/1.1/"); _coverHelper = new EpubCoverHelper(_zip, _opfns, _opfRoot, _oebps); }
private void ParseAnchors(TokenIndex top, EpubPath path, HtmlNode node) { foreach (string str in new[] { "id", "name" }) { string attributeValue = node.GetAttributeValue(str, string.Empty); if (!string.IsNullOrEmpty(attributeValue)) { AddAnchor(top, path.CurrentFilePath + "#" + attributeValue); } } }
public EpubTokenParser(XDocument opf, EpubPath opfPath, ZipContainer zip, CSS css, Dictionary<string, int> anchors) { _opf = opf; _opfPath = opfPath; _zip = zip; _css = css; _anchors = anchors; _opfns = XNamespace.Get("http://www.idpf.org/2007/opf"); _opfRoot = _opf.Root; if (_opfRoot == null) throw new DataException("Invalid epub meta info."); }
public EpubTokenParser(XDocument opf, EpubPath opfPath, ZipContainer zip, CSS css, Dictionary <string, int> anchors) { _opf = opf; _opfPath = opfPath; _zip = zip; _css = css; _anchors = anchors; _opfns = XNamespace.Get("http://www.idpf.org/2007/opf"); _opfRoot = _opf.Root; if (_opfRoot == null) { throw new DataException("Invalid epub meta info."); } }
public EpubSummaryParser(Stream source) { _zip = ZipContainer.Unzip(source); XDocument xmlDocument = _zip.GetFileStream("META-INF/container.xml").GetXmlDocument(); XElement root = xmlDocument.Root; if (root == null) { throw new DataException(InvalidEpubMetaInfo); } XAttribute attribute = root.Attribute("xmlns"); XNamespace xmlns = (attribute != null) ? XNamespace.Get(attribute.Value) : XNamespace.None; XAttribute fullPath = xmlDocument.Descendants(xmlns + "rootfile").First().Attribute("full-path"); if (fullPath == null) { throw new DataException(InvalidEpubMetaInfo); } string path = fullPath.Value; _opfPath = path; _opf = _zip.GetFileStream(path).GetXmlDocument(); _opfRoot = _opf.Root; if (_opfRoot == null) { throw new DataException(InvalidEpubMetaInfo); } _oebps = GetPath(path); _opfns = XNamespace.Get("http://www.idpf.org/2007/opf"); _opfdc = XNamespace.Get("http://purl.org/dc/elements/1.1/"); _coverHelper = new EpubCoverHelper(_zip, _opfns, _opfRoot, _oebps); }
private void ParseItems(XElement root, int level, XNamespace ns, EpubPath path) { foreach (XElement navPoint in root.Elements(ns + "navPoint")) { XElement text = navPoint.Elements(ns + "navLabel").Elements(ns + "text").FirstOrDefault(); if (text == null) { continue; } XAttribute srcAttr = navPoint.Elements(ns + "content").Attributes("src").FirstOrDefault(); if (srcAttr == null) { continue; } string cleanSource = srcAttr.Value; int length = srcAttr.Value.IndexOf("#", StringComparison.Ordinal); length = length > -1 ? length : cleanSource.Length; cleanSource = cleanSource.Substring(0, length); string key = path + cleanSource; if (Anchors.ContainsKey(key)) { int num = Anchors[key]; var item = new BookChapter { Level = level, Title = CleanText(text.Value), TokenID = num }; Chapters.Add(item); ParseItems(navPoint, level + 1, ns, path); } } }
private IEnumerable <TokenBase> ParseNodes(HtmlNode container, Stack <TextVisualProperties> propertiesStack, TokenIndex top, EpubPath path, int parentID = -1) { foreach (HtmlNode child in container.ChildNodes) { var asText = child as HtmlTextNode; if (asText != null && !string.IsNullOrEmpty(asText.Text)) { foreach (TokenBase text in ParseText(asText.Text, top)) { yield return(text); } } else { TextVisualProperties properties = propertiesStack.Peek().Clone().Update(child, _css); properties.LinkID = string.Empty; if (child.Name == "a" || child.Name == "span") { ParseAnchors(top, path, child); } if (child.Name == "a") { string href = child.GetAttributeValue("href", string.Empty); if (!string.IsNullOrEmpty(href)) { if (href.StartsWith("#")) { properties.LinkID = path.CurrentFilePath + href; } else { properties.LinkID = path + href; } } } if (string.Equals(child.Name, "img")) { HtmlAttributeCollection attributes = child.Attributes; string src = attributes.Contains("src") ? attributes["src"].Value : string.Empty; var pictureToken = new PictureToken(top.Index++, (path) + src); yield return(pictureToken); } else { if (child is HtmlCommentNode) { continue; } var tagOpenToken = new TagOpenToken(top.Index++, child, properties, parentID); yield return(tagOpenToken); propertiesStack.Push(properties); foreach (TokenBase token in ParseNodes(child, propertiesStack, top, path, tagOpenToken.ID)) { yield return(token); } propertiesStack.Pop(); yield return(new TagCloseToken(top.Index++, parentID)); } } } }
private void ParseItems(XElement root, int level, XNamespace ns, EpubPath path) { foreach (XElement navPoint in root.Elements(ns + "navPoint")) { XElement text = navPoint.Elements(ns + "navLabel").Elements(ns + "text").FirstOrDefault(); if (text == null) continue; XAttribute srcAttr = navPoint.Elements(ns + "content").Attributes("src").FirstOrDefault(); if (srcAttr == null) continue; string cleanSource = srcAttr.Value; int length = srcAttr.Value.IndexOf("#", StringComparison.Ordinal); length = length > -1 ? length : cleanSource.Length; cleanSource = cleanSource.Substring(0, length); string key = path + cleanSource; if (Anchors.ContainsKey(key)) { int num = Anchors[key]; var item = new BookChapter { Level = level, Title = CleanText(text.Value), TokenID = num }; Chapters.Add(item); ParseItems(navPoint, level + 1, ns, path); } } }
private IEnumerable<TokenBase> ParseNodes(HtmlNode container, Stack<TextVisualProperties> propertiesStack, TokenIndex top, EpubPath path, int parentID = -1) { foreach (HtmlNode child in container.ChildNodes) { var asText = child as HtmlTextNode; if (asText != null && !string.IsNullOrEmpty(asText.Text)) { foreach (TokenBase text in ParseText(asText.Text, top)) { yield return text; } } else { TextVisualProperties properties = propertiesStack.Peek().Clone().Update(child, _css); properties.LinkID = string.Empty; if (child.Name == "a" || child.Name == "span") { ParseAnchors(top, path, child); } if (child.Name == "a") { string href = child.GetAttributeValue("href", string.Empty); if (!string.IsNullOrEmpty(href)) { if (href.StartsWith("#")) { properties.LinkID = path.CurrentFilePath + href; } else { properties.LinkID = path + href; } } } if (string.Equals(child.Name, "img")) { HtmlAttributeCollection attributes = child.Attributes; string src = attributes.Contains("src") ? attributes["src"].Value : string.Empty; var pictureToken = new PictureToken(top.Index++, (path) + src); yield return pictureToken; } else { if(child is HtmlCommentNode) continue; var tagOpenToken = new TagOpenToken(top.Index++, child, properties, parentID); yield return tagOpenToken; propertiesStack.Push(properties); foreach (TokenBase token in ParseNodes(child, propertiesStack, top, path, tagOpenToken.ID)) { yield return token; } propertiesStack.Pop(); yield return new TagCloseToken(top.Index++, parentID); } } } }
private void ParseAnchors(TokenIndex top, EpubPath path, HtmlNode node) { foreach (string str in new[] {"id", "name"}) { string attributeValue = node.GetAttributeValue(str, string.Empty); if (!string.IsNullOrEmpty(attributeValue)) { AddAnchor(top, path.CurrentFilePath + "#" + attributeValue); } } }