private IHTMLMetaElement GetElement(string metaDataName) { metaDataName = metaDataName.ToUpper(CultureInfo.InvariantCulture); if (!m_metaElementsGenerated) { // Get the meta tags IEnumerator tagEnumerator = MetaDataElements.GetEnumerator(); // Go through all the tags in the head and pull out the meta tags while (tagEnumerator.MoveNext()) { IHTMLMetaElement thisTag = (IHTMLMetaElement)tagEnumerator.Current; if (thisTag.name != null) { if (!m_metaElements.ContainsKey(thisTag.name)) { m_metaElements.Add(thisTag.name.ToUpper(CultureInfo.InvariantCulture), thisTag); } } else if (thisTag.httpEquiv != null) { if (!m_metaElements.ContainsKey(thisTag.httpEquiv)) { m_metaElements.Add(thisTag.httpEquiv.ToUpper(CultureInfo.InvariantCulture), thisTag); } } } m_metaTableGenerated = true; } return((IHTMLMetaElement)m_metaElements[metaDataName]); }
public void ExtractBookMetaData(HtmlDocument doc, BookMetaData data) { data.BookInfo.Author = null; data.BookInfo.Title = null; data.BookInfo.Publisher = null; data.DocInfo.Language = null; IHTMLDocument2 dom = (IHTMLDocument2)doc.DomDocument; foreach (IHTMLDOMNode node in dom.all) { if (string.Compare(node.nodeName, "meta", true) == 0) { IHTMLMetaElement meta = (IHTMLMetaElement)node; if (string.Compare(meta.name, "author") == 0) { data.BookInfo.Author = meta.content; } else if (string.Compare(meta.name, "publisher") == 0) { data.BookInfo.Publisher = meta.content; } else if (string.Compare(meta.name, "content-language") == 0) { data.DocInfo.Language = meta.content; } } else if (string.Compare(node.nodeName, "body", true) == 0) { // No more meta tags. break; } } // Get the title data.BookInfo.Title = GetDocumentTitle(doc); // If the title or the auther is blank then try to extract it from the // filename if (data.BookInfo.Title == null || data.BookInfo.Author == null) { string filename = doc.Url.LocalPath; try { FileInfo finfo = new FileInfo(filename); if (finfo.Extension.Length > 0) { filename = finfo.Name.Substring(0, finfo.Name.Length - finfo.Extension.Length); } else { filename = finfo.Name; } string author = null; string title = null; if (filename.IndexOf(" - ") != -1) { author = filename.Substring(0, filename.IndexOf(" - ")); title = filename.Substring(filename.IndexOf(" - ") + 3); } else { title = filename; } if (data.BookInfo.Title == null && title != null) { data.BookInfo.Title = title; } if (data.BookInfo.Author == null && author != null) { data.BookInfo.Author = author; } } catch (NotSupportedException) { // If the URL doesn't map to a filename like "about:blank" // then this exception can be thrown. We just ignore it. } } }