Exemple #1
0
        private static void ProcessMobileSpecificMetaElements(XElement headElement, DomSerializationParams domSerializationParams)
        {
            XElement metaViewportElement =
                (from metaElement in headElement.GetChildrenByTagName("meta")
                 where "viewport".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
                 select metaElement).FirstOrDefault();

            // remove meta 'viewport' element if present
            if (metaViewportElement != null)
            {
                metaViewportElement.Remove();
            }

            XElement metaHandheldFriendlyElement =
                (from metaElement in headElement.GetChildrenByTagName("meta")
                 where "HandheldFriendly".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
                 select metaElement).FirstOrDefault();

            // remove meta 'HandheldFriendly' element if present
            if (metaHandheldFriendlyElement != null)
            {
                metaHandheldFriendlyElement.Remove();
            }

            if (!domSerializationParams.DontIncludeMobileSpecificMetaElements)
            {
                // add <meta name="HandheldFriendly" ... /> element
                metaHandheldFriendlyElement = new XElement(
                    XName.Get("meta", headElement.Name != null ? (headElement.Name.NamespaceName ?? "") : ""),
                    new XAttribute("name", "HandheldFriendly"),
                    new XAttribute("content", "true"));

                headElement.AddFirst(metaHandheldFriendlyElement);
            }
        }
Exemple #2
0
        private static void ProcessMetaContentTypeElement(XElement headElement, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeContentTypeMetaElement)
            {
                XElement metaContentTypeElement =
                    (from metaElement in headElement.GetChildrenByTagName("meta")
                     where "content-type".Equals(metaElement.GetAttributeValue("http-equiv", ""), StringComparison.OrdinalIgnoreCase)
                     select metaElement).FirstOrDefault();

                // remove meta 'http-equiv' element if present
                if (metaContentTypeElement != null)
                {
                    metaContentTypeElement.Remove();
                }

                // add <meta name="http-equiv" ... /> element
                metaContentTypeElement =
                    new XElement(
                        XName.Get("meta", headElement.Name != null ? (headElement.Name.NamespaceName ?? "") : ""),
                        new XAttribute("http-equiv", "Content-Type"),
                        new XAttribute("content", "text/html; charset=utf-8"));

                headElement.AddFirst(metaContentTypeElement);
            }
        }
        /// <summary>
        /// Serializes given DOM (System.Xml.Linq.XDocument object) to a string.
        /// </summary>
        /// <param name="document">System.Xml.Linq.XDocument instance containing the DOM to be serialized.</param>
        /// <param name="domSerializationParams">Contains parameters that modify the behaviour of the output serialization.</param>
        /// <param name="bodyOnly">if set to <c>true</c> [returns body only].</param>
        /// <returns>
        /// Serialized representation of the DOM.
        /// </returns>
        /// <exception cref="System.ArgumentException">
        /// The document must have a root.
        /// or
        /// The document's root must be an html element.
        /// </exception>
        public string SerializeDocument(XDocument document, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeContentTypeMetaElement
               || !domSerializationParams.DontIncludeMobileSpecificMetaElements
               || !domSerializationParams.DontIncludeGeneratorMetaElement)
              {
            var documentRoot = document.Root;

            if (documentRoot == null)
            {
              throw new ArgumentException("The document must have a root.");
            }

            if (documentRoot.Name == null || !"html".Equals(documentRoot.Name.LocalName, StringComparison.OrdinalIgnoreCase))
            {
              throw new ArgumentException("The document's root must be an html element.");
            }

            // add <head> element if not present
            var headElement = documentRoot.GetChildrenByTagName("head").FirstOrDefault();

            if (headElement == null)
            {
              headElement = new XElement("head");
              documentRoot.AddFirst(headElement);
            }

            ProcessMetaElements(headElement, domSerializationParams);
              }

              string result = document.ToString(domSerializationParams.PrettyPrint ? SaveOptions.None : SaveOptions.DisableFormatting);

              if (!domSerializationParams.DontIncludeDocTypeMetaElement)
              {
            result = "<!DOCTYPE html>\r\n" + result;
              }

              if (domSerializationParams.BodyOnly && document.Root != null)
              {
            var body = document.Root.GetElementsByTagName("body").FirstOrDefault();

            if (body != null)
            {
              result = body.GetInnerHtml();
            }
              }

              if (domSerializationParams.NoHeadline)
              {
            var h1 = document.Root.GetElementsByTagName("h1").FirstOrDefault();

            if (h1 != null)
            {
              result = result.Replace(h1.ToString(), "");
            }
              }

              return result;
        }
Exemple #4
0
        /// <summary>
        /// Serializes given DOM (System.Xml.Linq.XDocument object) to a string.
        /// </summary>
        /// <param name="document">System.Xml.Linq.XDocument instance containing the DOM to be serialized.</param>
        /// <param name="domSerializationParams">Contains parameters that modify the behaviour of the output serialization.</param>
        /// <param name="bodyOnly">if set to <c>true</c> [returns body only].</param>
        /// <returns>
        /// Serialized representation of the DOM.
        /// </returns>
        /// <exception cref="System.ArgumentException">
        /// The document must have a root.
        /// or
        /// The document's root must be an html element.
        /// </exception>
        public string SerializeDocument(XDocument document, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeContentTypeMetaElement ||
                !domSerializationParams.DontIncludeMobileSpecificMetaElements ||
                !domSerializationParams.DontIncludeGeneratorMetaElement)
            {
                var documentRoot = document.Root;

                if (documentRoot == null)
                {
                    throw new ArgumentException("The document must have a root.");
                }

                if (documentRoot.Name == null || !"html".Equals(documentRoot.Name.LocalName, StringComparison.OrdinalIgnoreCase))
                {
                    throw new ArgumentException("The document's root must be an html element.");
                }

                // add <head> element if not present
                var headElement = documentRoot.GetChildrenByTagName("head").FirstOrDefault();

                if (headElement == null)
                {
                    headElement = new XElement("head");
                    documentRoot.AddFirst(headElement);
                }

                ProcessMetaElements(headElement, domSerializationParams);
            }

            string result = document.ToString(domSerializationParams.PrettyPrint ? SaveOptions.None : SaveOptions.DisableFormatting);

            if (!domSerializationParams.DontIncludeDocTypeMetaElement)
            {
                result = "<!DOCTYPE html>\r\n" + result;
            }

            if (domSerializationParams.BodyOnly && document.Root != null)
            {
                var body = document.Root.GetElementsByTagName("body").FirstOrDefault();

                if (body != null)
                {
                    result = body.GetInnerHtml();
                }
            }

            if (domSerializationParams.NoHeadline)
            {
                var h1 = document.Root.GetElementsByTagName("h1").FirstOrDefault();

                if (h1 != null)
                {
                    result = result.Replace(h1.ToString(), "");
                }
            }

            return(result);
        }
Exemple #5
0
        private static void ProcessMetaGeneratorElement(XElement headElement, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeGeneratorMetaElement)
            {
                XElement metaGeneratorElement =
                    (from metaElement in headElement.GetChildrenByTagName("meta")
                     where "Generator".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
                     select metaElement).FirstOrDefault();

                // remove meta 'generator' element if present
                if (metaGeneratorElement != null)
                {
                    metaGeneratorElement.Remove();
                }

                // add <meta name="Generator" ... /> element
                metaGeneratorElement = new XElement(
                    XName.Get("meta", headElement.Name != null ? (headElement.Name.NamespaceName ?? "") : ""),
                    new XAttribute("name", "Generator"),
                    new XAttribute("content", Consts.NReadabilityFullName));

                headElement.AddFirst(metaGeneratorElement);
            }
        }
        public string Transcode(string htmlContent, string url, DomSerializationParams domSerializationParams, out bool mainContentExtracted, out string nextPageUrl)
        {
            string extractedTitle;

              XDocument document =
            TranscodeToXml(
              htmlContent,
              url,
              out mainContentExtracted,
              out extractedTitle,
              out nextPageUrl);

              return _sgmlDomSerializer.SerializeDocument(document, domSerializationParams);
        }
        private static void ProcessMobileSpecificMetaElements(XElement headElement, DomSerializationParams domSerializationParams)
        {
            XElement metaViewportElement =
            (from metaElement in headElement.GetChildrenByTagName("meta")
             where "viewport".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
             select metaElement).FirstOrDefault();

              // remove meta 'viewport' element if present
              if (metaViewportElement != null)
              {
            metaViewportElement.Remove();
              }

              XElement metaHandheldFriendlyElement =
            (from metaElement in headElement.GetChildrenByTagName("meta")
             where "HandheldFriendly".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
             select metaElement).FirstOrDefault();

              // remove meta 'HandheldFriendly' element if present
              if (metaHandheldFriendlyElement != null)
              {
            metaHandheldFriendlyElement.Remove();
              }

              if (!domSerializationParams.DontIncludeMobileSpecificMetaElements)
              {
            // add <meta name="HandheldFriendly" ... /> element
            metaHandheldFriendlyElement = new XElement(
              XName.Get("meta", headElement.Name != null ? (headElement.Name.NamespaceName ?? "") : ""),
              new XAttribute("name", "HandheldFriendly"),
              new XAttribute("content", "true"));

            headElement.AddFirst(metaHandheldFriendlyElement);
              }
        }
        private static void ProcessMetaGeneratorElement(XElement headElement, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeGeneratorMetaElement)
              {
            XElement metaGeneratorElement =
              (from metaElement in headElement.GetChildrenByTagName("meta")
               where "Generator".Equals(metaElement.GetAttributeValue("name", ""), StringComparison.OrdinalIgnoreCase)
               select metaElement).FirstOrDefault();

            // remove meta 'generator' element if present
            if (metaGeneratorElement != null)
            {
              metaGeneratorElement.Remove();
            }

            // add <meta name="Generator" ... /> element
            metaGeneratorElement = new XElement(
              XName.Get("meta", headElement.Name != null ? (headElement.Name.NamespaceName ?? "") : ""),
              new XAttribute("name", "Generator"),
              new XAttribute("content", Consts.NReadabilityFullName));

            headElement.AddFirst(metaGeneratorElement);
              }
        }
 private static void ProcessMetaElements(XElement headElement, DomSerializationParams domSerializationParams)
 {
     ProcessMetaContentTypeElement(headElement, domSerializationParams);
       ProcessMobileSpecificMetaElements(headElement, domSerializationParams);
       ProcessMetaGeneratorElement(headElement, domSerializationParams);
 }
        private static void ProcessMetaContentTypeElement(XElement headElement, DomSerializationParams domSerializationParams)
        {
            if (!domSerializationParams.DontIncludeContentTypeMetaElement)
              {
            XElement metaContentTypeElement =
              (from metaElement in headElement.GetChildrenByTagName("meta")
               where "content-type".Equals(metaElement.GetAttributeValue("http-equiv", ""), StringComparison.OrdinalIgnoreCase)
               select metaElement).FirstOrDefault();

            // remove meta 'http-equiv' element if present
            if (metaContentTypeElement != null)
            {
              metaContentTypeElement.Remove();
            }

            // add <meta name="http-equiv" ... /> element
            metaContentTypeElement =
              new XElement(
            XName.Get("meta", headElement.Name != null ? (headElement.Name.NamespaceName ?? "") : ""),
            new XAttribute("http-equiv", "Content-Type"),
            new XAttribute("content", "text/html; charset=utf-8"));

            headElement.AddFirst(metaContentTypeElement);
              }
        }
Exemple #11
0
 private static void ProcessMetaElements(XElement headElement, DomSerializationParams domSerializationParams)
 {
     ProcessMetaContentTypeElement(headElement, domSerializationParams);
     ProcessMobileSpecificMetaElements(headElement, domSerializationParams);
     ProcessMetaGeneratorElement(headElement, domSerializationParams);
 }
Exemple #12
0
 /// <summary>
 /// Serializes given DOM (System.Xml.Linq.XDocument object) to a string.
 /// </summary>
 /// <param name="document">System.Xml.Linq.XDocument instance containing the DOM to be serialized.</param>
 /// <returns>Serialized representation of the DOM.</returns>
 public string SerializeDocument(XDocument document)
 {
     return(SerializeDocument(document, DomSerializationParams.CreateDefault()));
 }