/// <summary>
        /// Deserializes a html document
        /// </summary>
        /// <param name="documentHtml">The document HTML</param>
        /// <param name="documentText">The document text</param>
        /// <param name="documentXml">The document text</param>
        /// <param name="displayHtml">(optional) The display HTML</param>
        /// <returns>
        /// the html document
        /// </returns>
        /// <exception cref="System.Runtime.Serialization.SerializationException">Error deserializing html document</exception>
        public virtual SerializableDocument Deserialize(string documentHtml, string documentText, string documentXml, string displayHtml = null)
        {
            try
            {
                SerializableElementList elementArray;
                using (var stringReader = new StringReader(documentXml))
                    using (var xmlReader = XmlReader.Create(stringReader, _readerSettings))
                    {
                        elementArray = (SerializableElementList)_serializer.Deserialize(xmlReader);
                    }

                var elements      = elementArray.SerializableElements;
                var elementLookup = SerializationUtils.CreateLookup(elements);
                foreach (var element in elements)
                {
                    element.Link(elementLookup);
                    element.CropHtmlAndText(documentHtml, documentText);
                    element.SetDefaultStyleLookup(_defaultStyleLookup);
                }

                var info = new HtmlDocumentInfo(elementArray.Url, elementArray.BrowserVersion, elementArray.CodeVersion, elementArray.CreationDate);
                var doc  = new SerializableDocument(elements[0], info, _defaultStyleLookup, documentHtml, documentText);
                doc.DisplayHtml = displayHtml;
                return(doc);
            }
            catch (Exception ex)
            {
                throw new SerializationException("Error deserializing html document", ex);
            }
        }
 public SerializableElementList(List <SerializableElement> elements, HtmlDocumentInfo info)
 {
     this.SerializableElements = elements;
     this.Url            = info.Url;
     this.BrowserVersion = info.BrowserVersion;
     this.CodeVersion    = info.CodeVersion;
     this.CreationDate   = info.CreationDate;
 }
        /// <summary>
        /// Creates the HTML document information
        /// </summary>
        /// <param name="url">The URL</param>
        /// <returns>the HTML document information</returns>
        private HtmlDocumentInfo CreateInfo(string url)
        {
            string browserVersion = Registry.LocalMachine.OpenSubKey(@"Software\Microsoft\Internet Explorer").GetValue("svcVersion").ToString();
            string codeVersion    = Assembly.GetExecutingAssembly().GetName().Version.ToString();
            var    creationDate   = DateTime.Now;
            var    info           = new HtmlDocumentInfo(url, browserVersion, codeVersion, creationDate);

            return(info);
        }
Beispiel #4
0
 /// <summary>
 /// Initializes a new instance of the <see cref="MsHtmlDocument" /> class
 /// </summary>
 /// <param name="root">The root element</param>
 /// <param name="info">The Html Document information</param>
 /// <param name="defaultStyleLookup">The default style lookup</param>
 public MsHtmlDocument(MsHtmlElement root, HtmlDocumentInfo info, DefaultStyleLookup defaultStyleLookup)
     : base(root, info, defaultStyleLookup)
 {
 }
Beispiel #5
0
 /// <summary>
 /// Initializes a new instance of the <see cref="MsHtmlDocument" /> class
 /// </summary>
 /// <param name="root">The root element</param>
 /// <param name="info">The Html Document information</param>
 /// <param name="defaultStyleLookup">The default style lookup</param>
 /// <param name="displayHtml">The display HTML</param>
 public MsHtmlDocument(MsHtmlElement root, HtmlDocumentInfo info, DefaultStyleLookup defaultStyleLookup, string displayHtml)
     : base(root, info, defaultStyleLookup)
 {
     this.DisplayHtml = displayHtml;
 }
Beispiel #6
0
 /// <summary>
 /// Initializes a new instance of the <see cref="SerializableDocument" /> class
 /// </summary>
 /// <param name="root">The root element</param>
 /// <param name="info">The HTML Document information</param>
 /// <param name="defaultStyleLookup">The default style lookup</param>
 /// <param name="html">The html</param>
 /// <param name="text">The text</param>
 public SerializableDocument(SerializableElement root, HtmlDocumentInfo info, DefaultStyleLookup defaultStyleLookup, string html, string text)
     : base(root, info, defaultStyleLookup)
 {
     this.Html = html;
     this.Text = text;
 }