private static void ProcessMobileSpecificMetaElements(XElement headElement, DomSerializationParams domSerializationParams) { XElement metaViewportElement = (from metaElement in headElement.GetChildrenByTagName("meta") where "viewport".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase) select metaElement).FirstOrDefault(); // remove meta 'viewport' element if present if (metaViewportElement != null) { metaViewportElement.Remove(); } }
private async Task <TranscodeResult> DoTranscodeAsync(string url, DomSerializationParams domSerializationParams) { _curPageNum = 1; _parsedPages = new List <string>(); /* Make sure this document is added to the list of parsed pages first, so we don't double up on the first page */ _parsedPages.Add(Regex.Replace(url, @"\/$", "")); string htmlContent = await _urlFetcher.FetchAsync(url).ConfigureAwait(false); /* If we can't fetch the page, then exit. */ if (string.IsNullOrEmpty(htmlContent)) { return(new TranscodeResult(false)); } /* Attempt to transcode the page */ XDocument document; document = _transcoder.TranscodeToXml(htmlContent, url, out bool mainContentExtracted, out string extractedTitle, out string?nextPage); if (nextPage != null) { await AppendNextPageAsync(document, nextPage).ConfigureAwait(false); } /* If there are multiple pages, rename the first content div */ if (_curPageNum > 1) { var articleContainer = document.GetElementById("readInner").Element("div"); articleContainer.SetId(pageIdPrefix + "1"); articleContainer.SetClass("page"); } string content = _sgmlDomSerializer.Serialize(document, domSerializationParams); return(new TranscodeResult(mainContentExtracted) { Content = content, Title = extractedTitle }); }
private static void ProcessMetaGeneratorElement(XElement headElement, DomSerializationParams domSerializationParams) { if (!domSerializationParams.DontIncludeGeneratorMetaElement) { XElement metaGeneratorElement = (from metaElement in headElement.GetChildrenByTagName("meta") where "Generator".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase) select metaElement).FirstOrDefault(); // remove meta 'generator' element if present if (metaGeneratorElement != null) { metaGeneratorElement.Remove(); } headElement.AddFirst(metaGeneratorElement); } }
private static void ProcessMetaContentTypeElement(XElement headElement, DomSerializationParams domSerializationParams) { if (!domSerializationParams.DontIncludeContentTypeMetaElement) { XElement metaContentTypeElement = (from metaElement in headElement.GetChildrenByTagName("meta") where "content-type".Equals(metaElement.GetAttributeValue("http-equiv", ""), StringComparison.OrdinalIgnoreCase) select metaElement).FirstOrDefault(); // remove meta 'http-equiv' element if present if (metaContentTypeElement != null) { metaContentTypeElement.Remove(); } // <meta charset="utf-8"/> // headElement.AddFirst(new XElement("meta", new XAttribute("charset", "utf-8"))); } }
/// <summary> /// Serializes given DOM (System.Xml.Linq.XDocument object) to a string. /// </summary> /// <param name="document">System.Xml.Linq.XDocument instance containing the DOM to be serialized.</param> /// <param name="domSerializationParams">Contains parameters that modify the behaviour of the output serialization.</param> /// <returns>Serialized representation of the DOM.</returns> public string Serialize(XDocument document, DomSerializationParams domSerializationParams) { if (!domSerializationParams.DontIncludeContentTypeMetaElement || !domSerializationParams.DontIncludeMobileSpecificMetaElements || !domSerializationParams.DontIncludeGeneratorMetaElement) { var documentRoot = document.Root; if (documentRoot == null) { throw new ArgumentException("The document must have a root."); } if (documentRoot.Name == null || !"html".Equals(documentRoot.Name.LocalName, StringComparison.OrdinalIgnoreCase)) { throw new ArgumentException("The document's root must be an html element."); } // add <head> element if not present var headElement = documentRoot.GetChildrenByTagName("head").FirstOrDefault(); if (headElement == null) { headElement = new XElement("head"); documentRoot.AddFirst(headElement); } ProcessMetaElements(headElement, domSerializationParams); } string result = document.ToString(domSerializationParams.PrettyPrint ? SaveOptions.None : SaveOptions.DisableFormatting); return(result); }
public Task <TranscodeResult> TranscodeAsync(string url) { return(DoTranscodeAsync(url, DomSerializationParams.CreateDefault())); }
public Task <TranscodeResult> TranscodeAsync(string url, DomSerializationParams domSerializationParams) { return(DoTranscodeAsync(url, domSerializationParams)); }
private static void ProcessMetaElements(XElement headElement, DomSerializationParams domSerializationParams) { ProcessMetaContentTypeElement(headElement, domSerializationParams); ProcessMobileSpecificMetaElements(headElement, domSerializationParams); ProcessMetaGeneratorElement(headElement, domSerializationParams); }
/// <summary> /// Serializes given DOM (System.Xml.Linq.XDocument object) to a string. /// </summary> /// <param name="document">System.Xml.Linq.XDocument instance containing the DOM to be serialized.</param> /// <returns>Serialized representation of the DOM.</returns> public string Serialize(XDocument document) { return(Serialize(document, DomSerializationParams.CreateDefault())); }