/// <summary> /// Deserializes a html document /// </summary> /// <param name="documentHtml">The document HTML</param> /// <param name="documentText">The document text</param> /// <param name="documentXml">The document text</param> /// <param name="displayHtml">(optional) The display HTML</param> /// <returns> /// the html document /// </returns> /// <exception cref="System.Runtime.Serialization.SerializationException">Error deserializing html document</exception> public virtual SerializableDocument Deserialize(string documentHtml, string documentText, string documentXml, string displayHtml = null) { try { SerializableElementList elementArray; using (var stringReader = new StringReader(documentXml)) using (var xmlReader = XmlReader.Create(stringReader, _readerSettings)) { elementArray = (SerializableElementList)_serializer.Deserialize(xmlReader); } var elements = elementArray.SerializableElements; var elementLookup = SerializationUtils.CreateLookup(elements); foreach (var element in elements) { element.Link(elementLookup); element.CropHtmlAndText(documentHtml, documentText); element.SetDefaultStyleLookup(_defaultStyleLookup); } var info = new HtmlDocumentInfo(elementArray.Url, elementArray.BrowserVersion, elementArray.CodeVersion, elementArray.CreationDate); var doc = new SerializableDocument(elements[0], info, _defaultStyleLookup, documentHtml, documentText); doc.DisplayHtml = displayHtml; return(doc); } catch (Exception ex) { throw new SerializationException("Error deserializing html document", ex); } }
public SerializableElementList(List <SerializableElement> elements, HtmlDocumentInfo info) { this.SerializableElements = elements; this.Url = info.Url; this.BrowserVersion = info.BrowserVersion; this.CodeVersion = info.CodeVersion; this.CreationDate = info.CreationDate; }
/// <summary> /// Creates the HTML document information /// </summary> /// <param name="url">The URL</param> /// <returns>the HTML document information</returns> private HtmlDocumentInfo CreateInfo(string url) { string browserVersion = Registry.LocalMachine.OpenSubKey(@"Software\Microsoft\Internet Explorer").GetValue("svcVersion").ToString(); string codeVersion = Assembly.GetExecutingAssembly().GetName().Version.ToString(); var creationDate = DateTime.Now; var info = new HtmlDocumentInfo(url, browserVersion, codeVersion, creationDate); return(info); }
/// <summary> /// Initializes a new instance of the <see cref="MsHtmlDocument" /> class /// </summary> /// <param name="root">The root element</param> /// <param name="info">The Html Document information</param> /// <param name="defaultStyleLookup">The default style lookup</param> public MsHtmlDocument(MsHtmlElement root, HtmlDocumentInfo info, DefaultStyleLookup defaultStyleLookup) : base(root, info, defaultStyleLookup) { }
/// <summary> /// Initializes a new instance of the <see cref="MsHtmlDocument" /> class /// </summary> /// <param name="root">The root element</param> /// <param name="info">The Html Document information</param> /// <param name="defaultStyleLookup">The default style lookup</param> /// <param name="displayHtml">The display HTML</param> public MsHtmlDocument(MsHtmlElement root, HtmlDocumentInfo info, DefaultStyleLookup defaultStyleLookup, string displayHtml) : base(root, info, defaultStyleLookup) { this.DisplayHtml = displayHtml; }
/// <summary> /// Initializes a new instance of the <see cref="SerializableDocument" /> class /// </summary> /// <param name="root">The root element</param> /// <param name="info">The HTML Document information</param> /// <param name="defaultStyleLookup">The default style lookup</param> /// <param name="html">The html</param> /// <param name="text">The text</param> public SerializableDocument(SerializableElement root, HtmlDocumentInfo info, DefaultStyleLookup defaultStyleLookup, string html, string text) : base(root, info, defaultStyleLookup) { this.Html = html; this.Text = text; }