public LightWeightHTMLReplacer(string html, string url, LightWeightHTMLMetaData metaData, bool fragmentMode) : base(html) { _url = url; _metaData = metaData; if (fragmentMode) { _firstTag = false; _seenHead = true; _seenBody = true; } }
public static LightWeightHTMLDocument FromStream(Stream stream, string url, string name) { if (!stream.CanSeek) { string filePath = TempFileManager.Instance.CreateTempFile(); using (FileStream file = new FileStream(filePath, FileMode.Open)) StreamHelper.Transfer(stream, file); return(LightWeightHTMLDocument.FromFile(filePath, url, name)); } else { Encoding currentEncoding = Encoding.Default; LightWeightHTMLDocument lwDoc = null; using (StreamReader reader = new StreamReader(stream, currentEncoding)) { lwDoc = LightWeightHTMLDocument.FromString(reader.ReadToEnd(), url, name, true); // If there is no metadata that disagrees with our encoding, just return the DOM read with default decoding LightWeightHTMLMetaData metaData = new LightWeightHTMLMetaData(lwDoc); if (metaData != null && metaData.Charset != null) { try { // The decoding is different than the encoding used to read this document, reread it with correct encoding Encoding encoding = Encoding.GetEncoding(metaData.Charset); if (encoding != currentEncoding) { reader.DiscardBufferedData(); stream.Seek(0, SeekOrigin.Begin); using (StreamReader reader2 = new StreamReader(stream, encoding)) { lwDoc = LightWeightHTMLDocument.FromString(reader2.ReadToEnd(), url, name, true); } } } catch (NotSupportedException) { // The encoding isn't supported on this system } catch (ArgumentException) { // The encoding isn't an encoding that the OS even knows about (its probably // not well formatted or misspelled or something) } } } return(lwDoc); } }
/// <summary> /// Any setting that is derivaed from the homepage html needs to be in this function. This function is turned /// on and off when detecting blog seetings through the IncludeHomePageSettings. None of these checks will be run /// if the internet is not active. As each check is made, it does not need to be applied back the _content until the end /// at which time it will write the settings back to the registry. /// </summary> private void DetectHomePageSettings() { if (_homepageAccessor.HtmlDocument == null) return; IDictionary homepageSettings = new Hashtable(); Debug.Assert(!UseManifestCache, "This code will not run correctly under the manifest cache, due to option overrides not being set"); LightWeightHTMLMetaData metaData = new LightWeightHTMLMetaData(_homepageAccessor.HtmlDocument); if (metaData.Charset != null) { try { homepageSettings.Add(BlogClientOptions.CHARACTER_SET, metaData.Charset); } catch (NotSupportedException) { //not an actual encoding } } string docType = new LightWeightHTMLMetaData(_homepageAccessor.HtmlDocument).DocType; if (docType != null) { bool xhtml = docType.IndexOf("xhtml", StringComparison.OrdinalIgnoreCase) >= 0; if (xhtml) { homepageSettings.Add(BlogClientOptions.REQUIRES_XHTML, true.ToString(CultureInfo.InvariantCulture)); } } //checking whether blog is rtl HtmlExtractor extractor = new HtmlExtractor(_homepageAccessor.HtmlDocument.RawHtml); if (extractor.Seek(new OrPredicate( new SmartPredicate("<html dir>"), new SmartPredicate("<body dir>"))).Success) { BeginTag tag = (BeginTag)extractor.Element; string dir = tag.GetAttributeValue("dir"); if (String.Compare(dir, "rtl", StringComparison.OrdinalIgnoreCase) == 0) { homepageSettings.Add(BlogClientOptions.TEMPLATE_IS_RTL, true.ToString(CultureInfo.InvariantCulture)); } } if (_homepageAccessor.HtmlDocument != null) { string html = _homepageAccessor.OriginalHtml; ImageViewer viewer = DhtmlImageViewers.DetectImageViewer(html, _context.HomepageUrl); if (viewer != null) { homepageSettings.Add(BlogClientOptions.DHTML_IMAGE_VIEWER, viewer.Name); } } _context.HomePageOverrides = homepageSettings; }
public static IHTMLDocument2 StreamToHTMLDoc(Stream stream, string baseUrl, bool escapePaths) { if (!stream.CanSeek) { MemoryStream mStream = new MemoryStream(); StreamHelper.Transfer(stream, mStream); mStream.Seek(0, SeekOrigin.Begin); stream = mStream; } string htmlContent = null; Encoding currentEncoding = Encoding.Default; LightWeightHTMLDocument lwDoc = null; using (StreamReader reader = new StreamReader(stream, currentEncoding)) { htmlContent = reader.ReadToEnd(); lwDoc = LightWeightHTMLDocument.FromString(htmlContent, baseUrl, true); // If there is no metadata that disagrees with our encoding, just return the DOM read with default decoding LightWeightHTMLMetaData metaData = new LightWeightHTMLMetaData(lwDoc); if (metaData != null && metaData.Charset != null) { try { // The decoding is different than the encoding used to read this document, reread it with correct encoding Encoding encoding = Encoding.GetEncoding(metaData.Charset); if (encoding != currentEncoding) { reader.DiscardBufferedData(); stream.Seek(0, SeekOrigin.Begin); using (StreamReader reader2 = new StreamReader(stream, encoding)) { htmlContent = reader2.ReadToEnd(); } } } catch (NotSupportedException) { // The encoding isn't supported on this system } catch (ArgumentException) { // The encoding isn't an encoding that the OS even knows about (its probably // not well formatted or misspelled or something) } } } //now that the html content is in loaded in the right encoding, convert it into a document. IHTMLDocument2 doc2 = StringToHTMLDoc(htmlContent, baseUrl, escapePaths, escapePaths); return doc2; }
public static LightWeightHTMLDocument FromStream(Stream stream, string url, string name) { if (!stream.CanSeek) { string filePath = TempFileManager.Instance.CreateTempFile(); using (FileStream file = new FileStream(filePath, FileMode.Open)) StreamHelper.Transfer(stream, file); return LightWeightHTMLDocument.FromFile(filePath, url, name); } else { Encoding currentEncoding = Encoding.Default; LightWeightHTMLDocument lwDoc = null; using (StreamReader reader = new StreamReader(stream, currentEncoding)) { lwDoc = LightWeightHTMLDocument.FromString(reader.ReadToEnd(), url, name, true); // If there is no metadata that disagrees with our encoding, just return the DOM read with default decoding LightWeightHTMLMetaData metaData = new LightWeightHTMLMetaData(lwDoc); if (metaData != null && metaData.Charset != null) { try { // The decoding is different than the encoding used to read this document, reread it with correct encoding Encoding encoding = Encoding.GetEncoding(metaData.Charset); if (encoding != currentEncoding) { reader.DiscardBufferedData(); stream.Seek(0, SeekOrigin.Begin); using (StreamReader reader2 = new StreamReader(stream, encoding)) { lwDoc = LightWeightHTMLDocument.FromString(reader2.ReadToEnd(), url, name, true); } } } catch (NotSupportedException) { // The encoding isn't supported on this system } catch (ArgumentException) { // The encoding isn't an encoding that the OS even knows about (its probably // not well formatted or misspelled or something) } } } return lwDoc; } }
public LightWeightHTMLReplacer(string html, string url, LightWeightHTMLMetaData metaData) : this(html, url, metaData, false) { }