Example #1
0
        private static void ProcessMobileSpecificMetaElements(XElement headElement, DomSerializationParams domSerializationParams)
        {
            XElement metaViewportElement =
                (from metaElement in headElement.GetChildrenByTagName("meta")
                 where "viewport".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
                 select metaElement).FirstOrDefault();

            // remove meta 'viewport' element if present
            if (metaViewportElement != null)
            {
                metaViewportElement.Remove();
            }
        }
        private async Task <TranscodeResult> DoTranscodeAsync(string url, DomSerializationParams domSerializationParams)
        {
            _curPageNum  = 1;
            _parsedPages = new List <string>();

            /* Make sure this document is added to the list of parsed pages first, so we don't double up on the first page */
            _parsedPages.Add(Regex.Replace(url, @"\/$", ""));

            string htmlContent = await _urlFetcher.FetchAsync(url).ConfigureAwait(false);

            /* If we can't fetch the page, then exit. */
            if (string.IsNullOrEmpty(htmlContent))
            {
                return(new TranscodeResult(false));
            }

            /* Attempt to transcode the page */
            XDocument document;

            document = _transcoder.TranscodeToXml(htmlContent, url,
                                                  out bool mainContentExtracted,
                                                  out string extractedTitle,
                                                  out string?nextPage);

            if (nextPage != null)
            {
                await AppendNextPageAsync(document, nextPage).ConfigureAwait(false);
            }

            /* If there are multiple pages, rename the first content div */
            if (_curPageNum > 1)
            {
                var articleContainer = document.GetElementById("readInner").Element("div");

                articleContainer.SetId(pageIdPrefix + "1");
                articleContainer.SetClass("page");
            }

            string content = _sgmlDomSerializer.Serialize(document, domSerializationParams);

            return(new TranscodeResult(mainContentExtracted)
            {
                Content = content,
                Title = extractedTitle
            });
        }
Example #3
0
        private static void ProcessMetaGeneratorElement(XElement headElement, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeGeneratorMetaElement)
            {
                XElement metaGeneratorElement =
                    (from metaElement in headElement.GetChildrenByTagName("meta")
                     where "Generator".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
                     select metaElement).FirstOrDefault();

                // remove meta 'generator' element if present
                if (metaGeneratorElement != null)
                {
                    metaGeneratorElement.Remove();
                }

                headElement.AddFirst(metaGeneratorElement);
            }
        }
Example #4
0
        private static void ProcessMetaContentTypeElement(XElement headElement, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeContentTypeMetaElement)
            {
                XElement metaContentTypeElement =
                    (from metaElement in headElement.GetChildrenByTagName("meta")
                     where "content-type".Equals(metaElement.GetAttributeValue("http-equiv", ""), StringComparison.OrdinalIgnoreCase)
                     select metaElement).FirstOrDefault();

                // remove meta 'http-equiv' element if present
                if (metaContentTypeElement != null)
                {
                    metaContentTypeElement.Remove();
                }

                // <meta charset="utf-8"/>

                // headElement.AddFirst(new XElement("meta", new XAttribute("charset", "utf-8")));
            }
        }
Example #5
0
        /// <summary>
        /// Serializes given DOM (System.Xml.Linq.XDocument object) to a string.
        /// </summary>
        /// <param name="document">System.Xml.Linq.XDocument instance containing the DOM to be serialized.</param>
        /// <param name="domSerializationParams">Contains parameters that modify the behaviour of the output serialization.</param>
        /// <returns>Serialized representation of the DOM.</returns>
        public string Serialize(XDocument document, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeContentTypeMetaElement ||
                !domSerializationParams.DontIncludeMobileSpecificMetaElements ||
                !domSerializationParams.DontIncludeGeneratorMetaElement)
            {
                var documentRoot = document.Root;

                if (documentRoot == null)
                {
                    throw new ArgumentException("The document must have a root.");
                }

                if (documentRoot.Name == null || !"html".Equals(documentRoot.Name.LocalName, StringComparison.OrdinalIgnoreCase))
                {
                    throw new ArgumentException("The document's root must be an html element.");
                }

                // add <head> element if not present
                var headElement = documentRoot.GetChildrenByTagName("head").FirstOrDefault();

                if (headElement == null)
                {
                    headElement = new XElement("head");
                    documentRoot.AddFirst(headElement);
                }

                ProcessMetaElements(headElement, domSerializationParams);
            }


            string result = document.ToString(domSerializationParams.PrettyPrint ? SaveOptions.None : SaveOptions.DisableFormatting);


            return(result);
        }
 public Task <TranscodeResult> TranscodeAsync(string url)
 {
     return(DoTranscodeAsync(url, DomSerializationParams.CreateDefault()));
 }
 public Task <TranscodeResult> TranscodeAsync(string url, DomSerializationParams domSerializationParams)
 {
     return(DoTranscodeAsync(url, domSerializationParams));
 }
Example #8
0
 private static void ProcessMetaElements(XElement headElement, DomSerializationParams domSerializationParams)
 {
     ProcessMetaContentTypeElement(headElement, domSerializationParams);
     ProcessMobileSpecificMetaElements(headElement, domSerializationParams);
     ProcessMetaGeneratorElement(headElement, domSerializationParams);
 }
Example #9
0
 /// <summary>
 /// Serializes given DOM (System.Xml.Linq.XDocument object) to a string.
 /// </summary>
 /// <param name="document">System.Xml.Linq.XDocument instance containing the DOM to be serialized.</param>
 /// <returns>Serialized representation of the DOM.</returns>
 public string Serialize(XDocument document)
 {
     return(Serialize(document, DomSerializationParams.CreateDefault()));
 }