예제 #1
1
        /// <summary>
        /// Returns an XML document from a given URL.
        /// </summary>
        /// <param name="web">The web.</param>
        /// <param name="url">The URL.</param>
        /// <param name="format">The format.</param>
        /// <param name="absolutizeLinks">if set to <c>true</c> [absolutize links].</param>
        /// <returns></returns>
        public static XmlDocument LoadHtmlAsXml(HtmlWeb web, string url, string format,
            bool absolutizeLinks)
        {
            // Declare necessary stream and writer objects
            MemoryStream m = new MemoryStream();
            XmlTextWriter xtw = new XmlTextWriter(m, null);

            // Load the content into the writer
            if (format == "html")
            {
                web.LoadHtmlAsXml(url, xtw);
                // Rewind the memory stream
                m.Position = 0;
                // Create, fill, and return the xml document
                XmlDocument xdoc = new XmlDocument();
                string content = (new StreamReader(m)).ReadToEnd();

                HtmlDocument doc = new HtmlDocument();
                doc.OptionOutputAsXml = true;
                doc.LoadHtml(content);

                if (absolutizeLinks == true)
                {
                    AttributeReferenceAbsolutizer.ExecuteDefaultAbsolutization
                        (doc.DocumentNode, url);
                }

                xdoc.LoadXml(doc.DocumentNode.OuterHtml);

                return xdoc;
            }
            else
            {
                HtmlDocument doc = web.Load(url);
                doc.OptionOutputAsXml = true;
                XmlDocument xdoc = new XmlDocument();

                if (absolutizeLinks == true)
                {
                    AttributeReferenceAbsolutizer.ExecuteDefaultAbsolutization
                        (doc.DocumentNode, url);
                }

                xdoc.LoadXml(doc.DocumentNode.OuterHtml);

                return xdoc;
            }
        }
예제 #2
0
        /// <summary>
        /// Returns an XML document from a given URL.
        /// </summary>
        /// <param name="web">The web.</param>
        /// <param name="url">The URL.</param>
        /// <param name="format">The format.</param>
        /// <param name="absolutizeLinks">if set to <c>true</c> [absolutize links].</param>
        /// <returns></returns>
        public static XmlDocument LoadHtmlAsXml(HtmlWeb web, string url, string format,
                                                bool absolutizeLinks)
        {
            // Declare necessary stream and writer objects
            MemoryStream  m   = new MemoryStream();
            XmlTextWriter xtw = new XmlTextWriter(m, null);

            // Load the content into the writer
            if (format == "html")
            {
                web.LoadHtmlAsXml(url, xtw);
                // Rewind the memory stream
                m.Position = 0;
                // Create, fill, and return the xml document
                XmlDocument xdoc    = new XmlDocument();
                string      content = (new StreamReader(m)).ReadToEnd();

                HtmlDocument doc = new HtmlDocument();
                doc.OptionOutputAsXml = true;
                doc.LoadHtml(content);

                if (absolutizeLinks == true)
                {
                    AttributeReferenceAbsolutizer.ExecuteDefaultAbsolutization
                        (doc.DocumentNode, url);
                }

                xdoc.LoadXml(doc.DocumentNode.OuterHtml);

                return(xdoc);
            }
            else
            {
                HtmlDocument doc = web.Load(url);
                doc.OptionOutputAsXml = true;
                XmlDocument xdoc = new XmlDocument();

                if (absolutizeLinks == true)
                {
                    AttributeReferenceAbsolutizer.ExecuteDefaultAbsolutization
                        (doc.DocumentNode, url);
                }

                xdoc.LoadXml(doc.DocumentNode.OuterHtml);

                return(xdoc);
            }
        }
예제 #3
0
    public static XmlDocument GetHtmlAsXml()
    {
        //Set up an in-memory stream to hold the HTML.
        MemoryStream  stream = new MemoryStream();
        XmlTextWriter writer = new XmlTextWriter(stream, Encoding.UTF8);

        //Grab HTML over the web and convert to XML.
        HtmlWeb web = new HtmlWeb();

        web.LoadHtmlAsXml("http://haacked.com/Demos/screen.html", writer);

        //Now read from that in-memory stream
        //into a new XmlDocument class.
        XmlDocument xml = LoadFromStream(stream);

        return(xml);
    }
예제 #4
0
        /*[STAThread]
         * static void Main(string[] args)
         * {
         *  HtmlWeb hw = new HtmlWeb();
         *
         *  // we are going to use cache, for demonstration purpose only.
         *  string cachePath = Path.GetFullPath(@".\cache");
         *  if (!Directory.Exists(cachePath))
         *  {
         *      Directory.CreateDirectory(cachePath);
         *  }
         *  hw.CachePath = cachePath;
         *  hw.UsingCache = true;
         *
         *  // set the following to true, if you don't want to use the Internet at all and if you are sure something is available in the cache (for testing purposes for example).
         *  //			hw.CacheOnly = true;
         *
         *  // this is the url we want to scrap
         *  // note: you want to check Terms Of Services, Copyrights and other legal issues if you plan to use this for non personnal work.
         *  string url = @"http://www.asp.net/Modules/MoreArticles.aspx?tabindex=0&mid=64";
         *
         *  // there are two methods to do the work
         *  // 1st method: use XSLT
         *  ElegantWay(hw, url);
         *
         *  // 2nd method: use C# code
         *  //			ManualWay(hw, url);
         * }*/


        public static void ElegantWay(HtmlWeb hw, string url)
        {
            string xslt = "http://*****:*****@"..\..\" + xslt, xslt, true);

            // create an XML file
            XmlTextWriter writer = new XmlTextWriter("rss.xml", System.Text.Encoding.UTF8);

            // get an Internet resource and write it as an XML file, after an XSLT transormation
            // if www.asp.net ever change its HTML format, just changes the XSL file. No need for recompilation.
            hw.LoadHtmlAsXml(url, xslt, null, writer);

            // cleanup
            writer.Flush();
            writer.Close();
        }
예제 #5
0
        static void ElegantWay(HtmlWeb hw, string url)
        {
            string xslt = "www.asp.net.ToRss.xsl";

            // copy the file so it exists aside the .exe
            File.Copy(@"..\..\" + xslt, xslt, true);

            // create an XML file
            XmlTextWriter writer = new XmlTextWriter("rss.xml", System.Text.Encoding.UTF8);

            // get an Internet resource and write it as an XML file, after an XSLT transormation
            // if www.asp.net ever change its HTML format, just changes the XSL file. No need for recompilation.
            hw.LoadHtmlAsXml(url, xslt, null, writer);

            // cleanup
            writer.Flush();
            writer.Close();
        }
예제 #6
0
        public XPathRequest(string url)
        {
            HtmlWeb       htmlWeb       = new HtmlWeb();
            MemoryStream  memoryStream  = new MemoryStream();
            XmlTextWriter xmlTextWriter = new XmlTextWriter(memoryStream, null);

            htmlWeb.LoadHtmlAsXml(url, xmlTextWriter);

            // rewind the memory stream
            memoryStream.Position = 0;

            // create, fill, and return the xml document
            XmlDocument xmlDoc        = new XmlDocument();
            string      xmlDocContent = (new StreamReader(memoryStream)).ReadToEnd();

            xmlDoc.LoadXml(xmlDocContent);

            xDoc = xmlDoc;
        }
예제 #7
0
        public static void HandleFailureCall(string fileName)
        {
            HtmlDocument htmlDoc = null;
            string       url     = "https://api.msrc.microsoft.com/cvrf/v2.0/cvrf/" + fileName.Remove(8);

            try
            {
                HtmlWeb       web    = new HtmlWeb();
                MemoryStream  memory = new MemoryStream();
                XmlTextWriter xtw    = new XmlTextWriter(memory, null);
                web.LoadHtmlAsXml(url, xtw);
                memory.Position = 0;
                XmlDocument xmlDoc = new XmlDocument();
                xmlDoc.LoadXml((new StreamReader(memory)).ReadToEnd());
                htmlDoc = web.Load(url);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Exception while Parcing Agility Parser Occured : " + ex.Message);
            }
        }