Ejemplo n.º 1
0
        // Creates XmlDocument from html content and return it with rootitem "<root>".
        public static XmlDocument ParseHtml(string sContent)
        {
            StringReader sr     = new StringReader("<root>" + sContent + "</root>");
            SgmlReader   reader = new SgmlReader();

            reader.WhitespaceHandling = WhitespaceHandling.All;
            reader.CaseFolding        = Sgml.CaseFolding.ToLower;
            reader.InputStream        = sr;

            StringWriter  sw = new StringWriter();
            XmlTextWriter w  = new XmlTextWriter(sw);

            w.Formatting = Formatting.Indented;
            w.WriteStartDocument();
            reader.Read();
            while (!reader.EOF)
            {
                w.WriteNode(reader, true);
            }
            w.Flush();
            w.Close();

            sw.Flush();

            // create document
            XmlDocument doc = new XmlDocument();

            doc.PreserveWhitespace = true;
            doc.XmlResolver        = null;
            doc.LoadXml(sw.ToString());

            reader.Close();

            return(doc);
        }
        public void ReadAccountOFX()
        {
            var file = Path.Combine(Directory.GetCurrentDirectory(), "resources", "extrato1.ofx");

            var reader = new SgmlReader
            {
                InputStream = new StringReader(ClearHeader(file)),
                DocType     = DOCUMENT_TYPE
            };

            var sw  = new StringWriter();
            var xml = new XmlTextWriter(sw);

            while (!reader.EOF)
            {
                xml.WriteNode(reader, true);
            }

            xml.Flush();
            xml.Close();

            var temp = sw.ToString().TrimStart().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);


            var doc = new XmlDocument();

            doc.Load(new StringReader(String.Join("", temp)));

            var transNode = doc.SelectSingleNode(TRANSACTIONS_XPATH);

            GetTransactions(transNode);
        }
Ejemplo n.º 3
0
        protected XmlReader CreateReader(string ofxBody)
        {
            XmlReader result = null;

            if (this.header.MajorVersion < 200 && !useConverter)
            {
#if USE_SGML_PARSER
                SgmlReader sgReader = new SgmlReader
                {
                    CaseFolding = CaseFolding.ToUpper,
                    DocType     = null,
                    InputStream = new System.IO.StringReader(ofxBody),
                    //SystemLiteral = dtdSystemLiteral,
                    WhitespaceHandling = WhitespaceHandling.None,
                };
                Assembly a     = typeof(SgmlReader).Assembly;
                string   name  = "PX.Objects.CA.ofx160.dtd";
                Stream   stm   = a.GetManifestResourceStream(name);
                string[] ResRs = a.GetManifestResourceNames();
                if (stm != null)
                {
                    StreamReader sr = new StreamReader(stm);
                    sgReader.Dtd = SgmlDtd.Parse(null, sgReader.DocType, sr, null, sgReader.WebProxy, null);
                }
                result = sgReader;
#endif
            }
            else if (this.header.MajorVersion >= 200 || useConverter)
            {
                result = new XmlTextReader(new System.IO.StringReader(ofxBody));
            }
            return(result);
        }
Ejemplo n.º 4
0
        public XDocument ConvertToXML(MemoryStream stream)
        {
            try
            {
                StreamReader sr = new StreamReader(stream, Encoding.UTF8);

                XDocument xml = null;

                using (SgmlReader sgml = new SgmlReader())
                {
                    sgml.IgnoreDtd   = true;
                    sgml.DocType     = "HTML";
                    sgml.InputStream = sr;
                    xml = XDocument.Load(sgml);
                }

                sr.Close();

                return(xml);
            }
            catch (Exception ex)
            {
                throw;
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// FaviconのURLを取得する
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static async Task <Uri> GetFaviconUrl(Uri url)
        {
            // TODO: HttpClientFactory
            using var client   = new HttpClient();
            using var response = await client.SendAsync(new HttpRequestMessage (HttpMethod.Get, url))
                                 .ConfigureAwait(false);

            response.EnsureSuccessStatusCode();

            var stream = response.Content.ReadAsStream();

            using var sgmlReader = new SgmlReader
                  {
                      DocType     = "html",
                      IgnoreDtd   = true,
                      CaseFolding = CaseFolding.ToLower,
                      InputStream = new StreamReader(stream, Encoding.UTF8),
                  };

            var rootElement = XElement.Load(sgmlReader, LoadOptions.PreserveWhitespace);
            var headElement = rootElement.Element("head");
            var linkElement = headElement.Elements()
                              .FirstOrDefault(e => e.Name == "link" && (e.Attribute("rel")?.Value?.ToLower().Split(" ").Contains("icon") ?? false));
            var favicon = linkElement?.Attribute("href")?.Value;

            return(!string.IsNullOrWhiteSpace(favicon)
                ? new Uri(url, favicon)
                : new Uri(url, DefaultFaviconPath));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// 将html代码转换为xml代码,需要在try-catch块中调用。
        /// </summary>
        public static string HtmlToXml(string html)
        {
            if (string.IsNullOrWhiteSpace(html))
            {
                return(string.Empty);
            }

            html = StringUtils.ReplaceIgnoreCase(html, "<br>", "<br />");
            html = StringUtils.ReplaceIgnoreCase(html, "&#", "&amp;#");
            html = html.Replace(" @", " hexadecimal-value-0x40"); //vuejs shorthand @click
            html = html.Replace(" :", " hexadecimal-value-0x3a"); //vuejs shorthand :href
            //strInputHtml = StringUtils.ReplaceNewline(strInputHtml, NEWLINE_REPLACEMENT);
            var reader = new SgmlReader
            {
                DocType = "HTML"
            };
            var sr = new System.IO.StringReader(html);

            reader.InputStream = sr;
            var sw = new System.IO.StringWriter();
            var w  = new XmlTextWriter(sw);

            reader.Read();
            while (!reader.EOF)
            {
                w.WriteNode(reader, true);
            }

            w.Flush();
            w.Close();
            var xml = sw.ToString();

            //xml = xml.Replace(NEWLINE_REPLACEMENT, "\r\n");
            return(xml);
        }
 // using Sgml, convert html document to xml document
 XmlDocument convertHTMLtoXML(string html)
 {
     try
     {
         //Console.Write("Converting HTML to XML...");
         XmlDocument docSites = new XmlDocument();
         try
         {
             SgmlReader xhtmlConverter = new SgmlReader();
             xhtmlConverter.InputStream = new System.IO.StringReader(html);
             xhtmlConverter.DocType     = "text/html";
             docSites.Load(xhtmlConverter);
         }
         catch (Exception)
         {
             docSites.LoadXml(html);
         }
         //Console.WriteLine("OK");
         return(docSites);
     }
     catch (Exception ex)
     {
         return(null);
     }
 }
Ejemplo n.º 8
0
        /************************************************************************/

        #region Private methods
        /// <summary>
        /// Converts Sgml to Xml
        /// </summary>
        /// <param name="inputStr">OFX File (SGML Format)</param>
        /// <returns>OFX File in XML format</returns>
        private static string SgmlToXml(string inputStr)
        {
            var reader = new SgmlReader
            {
                InputStream = new StringReader(inputStr),
                DocType     = "OFX"
            };

            var sw  = new StringWriter();
            var xml = new XmlTextWriter(sw);

            //write output of sgml reader to xml text writer
            while (!reader.EOF)
            {
                xml.WriteNode(reader, true);
            }

            //close xml text writer
            xml.Flush();
            xml.Close();

            var temp = sw.ToString().TrimStart().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);

            return(String.Join(String.Empty, temp));
        }
Ejemplo n.º 9
0
        void RunTest(Uri baseUri, string inputUri)
        {
            Uri    resolved = new Uri(baseUri, inputUri);
            string path     = resolved.LocalPath;

            this.passed  = 0;
            this.tests   = 0;
            this.ignored = 0;

            SgmlReader reader = new SgmlReader();

            if (verbose)
            {
                reader.ErrorLog = Console.Error;
            }
            RunTest(reader, path);

            Console.WriteLine("{0} Tests passed", this.passed);
            if ((this.passed + this.ignored) != this.tests)
            {
                Console.WriteLine("{0} Tests failed", this.tests - (this.passed + this.ignored));
            }
            if (this.ignored != 0)
            {
                Console.WriteLine("{0} Tests ignored", this.ignored);
            }
            Console.WriteLine();

            return;
        }
        public IEnumerable <Transaction> ReadOfxFile(string filePath)
        {
            var file = filePath;

            var reader = new SgmlReader
            {
                InputStream = new StringReader(ClearHeader(file)),
                DocType     = NiboConstants.DOCUMENT_TYPE
            };

            var sw  = new StringWriter();
            var xml = new XmlTextWriter(sw);

            while (!reader.EOF)
            {
                xml.WriteNode(reader, true);
            }

            xml.Flush();
            xml.Close();

            var temp = sw.ToString().TrimStart().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);


            var doc = new XmlDocument();

            doc.Load(new StringReader(String.Join("", temp)));

            var transNode = doc.SelectSingleNode(NiboConstants.TRANSACTIONS_XPATH);

            return(GetTransactions(transNode));
        }
Ejemplo n.º 11
0
        void Process(SgmlReader reader, string uri)
        {
            if (uri == null)
            {
                reader.InputStream = Console.In;
            }
            else
            {
                reader.Href = uri;
            }


            this.encoding ??= reader.GetEncoding();

            XmlTextWriter w = output != null
                ? new XmlTextWriter(output, this.encoding)
                : new XmlTextWriter(Console.Out);

            if (formatted)
            {
                w.Formatting = Formatting.Indented;
            }
            if (!noxmldecl)
            {
                w.WriteStartDocument();
            }
            reader.Read();
            while (!reader.EOF)
            {
                w.WriteNode(reader, true);
            }
            w.Flush();
            w.Close();
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Sets the content of the page.
        /// </summary>
        /// <param name="reader">
        /// The reader.
        /// </param>
        /// <exception cref="System.ArgumentNullException">
        /// The <paramref name="reader"/> parameter is <c>null</c>.
        /// </exception>
        protected void SetContent(TextReader reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            // setup SgmlReader
            using (var sgmlReader = new SgmlReader())
            {
                sgmlReader.DocType            = "HTML";
                sgmlReader.IgnoreDtd          = true;
                sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
                sgmlReader.CaseFolding        = CaseFolding.ToLower;
                sgmlReader.InputStream        = reader;

                // create document
                var document = new XmlDocument
                {
                    PreserveWhitespace = true,
                    XmlResolver        = null
                };

                document.Load(sgmlReader);

                _content = document;
            }
        }
Ejemplo n.º 13
0
        public static string GetWellFormedHTML(string html, string xpathNavPath)
        {
            // StreamReader sReader = null;
            StringWriter  sw     = null;
            SgmlReader    reader = null;
            XmlTextWriter writer = null;

            try
            {
                //  if (uri == String.Empty) uri = "http://www.XMLforASP.NET";
                // HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
                //  HttpWebResponse res = (HttpWebResponse)req.GetResponse();
                //  sReader = new StreamReader(res.GetResponseStream());
                reader             = new SgmlReader();
                reader.DocType     = "HTML";
                reader.InputStream = new StringReader(html);
                sw                = new StringWriter();
                writer            = new XmlTextWriter(sw);
                writer.Formatting = Formatting.Indented;
                //writer.WriteStartElement("Test");
                while (reader.Read())
                {
                    if (reader.NodeType != XmlNodeType.Whitespace)
                    {
                        writer.WriteNode(reader, true);
                    }
                }
                //writer.WriteEndElement();
                if (xpathNavPath == null)
                {
                    string sr = sw.ToString();
                    sr = sr.Replace("\r", "\n");
                    sr = sr.Replace("\n\n", "\n");
                    return(sr);
                }
                else
                { //Filter out nodes from HTML
                    StringBuilder     sb    = new StringBuilder();
                    XPathDocument     doc   = new XPathDocument(new StringReader(sw.ToString()));
                    XPathNavigator    nav   = doc.CreateNavigator();
                    XPathNodeIterator nodes = nav.Select(xpathNavPath);
                    while (nodes.MoveNext())
                    {
                        sb.Append(nodes.Current.Value + "\n");
                    }
                    string sr = sb.ToString();
                    sr = sr.Replace("\r", "\n");
                    sr = sr.Replace("\n\n", "\n");
                    return(sr);
                }
            }
            catch (Exception exp)
            {
                writer.Close();
                reader.Close();
                sw.Close();
                // sReader.Close();
                return(exp.Message);
            }
        }
Ejemplo n.º 14
0
        public void Load(string uri, int offset, ItemLoadParameter p, HashSet <Item> existsItems, string memo)
        {
            using (var wc = new WebClient())
            {
                wc.Encoding = Encoding.UTF8;
                using (var sgml = new SgmlReader {
                    Href = uri
                })
                {
                    var doc = new XmlDocument();
                    doc.Load(sgml);

                    foreach (var strs in LoadItemColumn(doc))
                    {
                        var id = p.IdConverter(strs) + offset;

                        Item item;
                        if (!_items.TryGetValue(id, out item))
                        {
                            _items.Add(id, item = new Item
                            {
                                Id     = id,
                                ItemId = id - offset,
                                Memo1  = memo,
                            });
                        }

                        p.ItemConverter(strs, item);
                        p.PostProcess?.Invoke(item);

                        existsItems.Add(item);
                    }
                }
            }
        }
Ejemplo n.º 15
0
        /// <summary>
        /// 处理html代码
        /// </summary>
        /// <param name="input">等处理的字符串</param>
        /// <param name="skipHtmlNode">是否跳过html节点</param>
        /// <param name="clearTag">是否清除html tag,只输出纯文本</param>
        /// <param name="maxCount">copy的文本的字符数,如果maxCount&lt;=0,copy全部文本</param>
        /// <param name="endStr">如果只copy了部分文本,部分文本后的附加字符,如...</param>
        /// <returns>处理后的html代码</returns>
        public static string ProcessHtml(string input, bool skipHtmlNode, bool clearTag, int maxCount, string endStr)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(input);
            }
            StringWriter  output = new StringWriter();
            XmlTextWriter writer = new XmlTextWriter(output);

            writer.Formatting = Formatting.Indented;

            SgmlReader reader = new SgmlReader();

            reader.DocType     = "HTML";
            reader.InputStream = new StringReader(input);

            WriteXml(writer, reader, true, skipHtmlNode, clearTag, maxCount, endStr);

            writer.Flush();
            writer.Close();

            reader.Close();

            return(output.ToString());
        }
Ejemplo n.º 16
0
        private static XDocument LoadDocument(ReadOnlySpan <char> htmlContent)
        {
            using var reader = new SgmlReader {
                      CaseFolding        = CaseFolding.ToLower,
                      DocType            = "HTML",
                      WhitespaceHandling = WhitespaceHandling.None
                  };

            var buffer = ArrayPool <byte> .Shared.Rent(htmlContent.Length * 2);

            try
            {
                int byteCount = Encoding.UTF8.GetBytes(htmlContent, buffer);

                using var sr = new StreamReader(new MemoryStream(buffer, 0, byteCount));

                reader.InputStream = sr;

                var document = XDocument.Load(reader);

                return(document);
            }
            finally
            {
                ArrayPool <byte> .Shared.Return(buffer);
            }
        }
Ejemplo n.º 17
0
 /// <summary>
 /// Uses <see cref="SgmlReader"/> to convert HTML to well-formed XHTML.
 /// </summary>
 /// <param name="input">The text to convert.</param>
 /// <returns>A <c>string</c> of well-formed XHTML.</returns>
 public static string HtmlToXhtml(string input)
 {
     if (input == null)
     {
         return(string.Empty);
     }
     try
     {
         input = "<html>" + input + "</html>";
         //
         SgmlReader reader = new SgmlReader();
         reader.DocType            = "HTML";
         reader.InputStream        = new StringReader(input);
         reader.CaseFolding        = CaseFolding.ToLower;
         reader.WhitespaceHandling = WhitespaceHandling.None;
         //
         StringWriter  sw     = new StringWriter(new StringBuilder(), null);
         XmlTextWriter writer = new XmlTextWriter(sw);
         writer.Formatting = Formatting.Indented;
         writer.IndentChar = '\t';
         while (!reader.EOF)
         {
             writer.WriteNode(reader, true);
         }
         writer.Close();
         //
         string buff = sw.ToString();
         // remove <html> tag
         return(buff.Substring(6, buff.Length - 13));
     }
     catch (Exception ex)
     {
         return("Error converting HTML to XHTML: " + ex.Message);
     }
 }
Ejemplo n.º 18
0
        /// <summary>
        /// Converts SGML to XML
        /// </summary>
        /// <param name="file">OFX File (SGML Format)</param>
        /// <returns>OFX File in XML format</returns>
        private string SGMLToXML(string file)
        {
            var reader = new SgmlReader();

            //Inititialize SGML reader
            reader.InputStream = new StringReader(ParseHeader(file));
            reader.DocType     = "OFX";

            var sw  = new StringWriter();
            var xml = new XmlTextWriter(sw);

            //write output of sgml reader to xml text writer
            while (!reader.EOF)
            {
                xml.WriteNode(reader, true);
            }

            //close xml text writer
            xml.Flush();
            xml.Close();

            var temp = sw.ToString().TrimStart().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);

            return(string.Join("", temp));
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Converts SGML to XML
        /// </summary>
        /// <param name="file">OFX File (SGML Format)</param>
        /// <returns>OFX File in XML format</returns>
        private string SgmltoXml(string file)
        {
            var assembly = this.GetType().Assembly;
            var docType  = assembly.GetManifestResourceStream("OfxSharpLib.ofx160.dtd");

            var reader = new SgmlReader
            {
                InputStream = new StringReader(ParseHeader(file)),
                DocType     = "OFX",
                Dtd         = SgmlDtd.Parse(null, "OFX", new StreamReader(docType), null, null, null),
            };

            var sw  = new StringWriter();
            var xml = new XmlTextWriter(sw);

            //write output of sgml reader to xml text writer
            while (!reader.EOF)
            {
                xml.WriteNode(reader, true);
            }

            //close xml text writer
            xml.Flush();
            xml.Close();

            var temp = sw.ToString().TrimStart().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);

            return(String.Join("", temp));
        }
Ejemplo n.º 20
0
        private static string RunTest(CaseFolding caseFolding, string doctype, bool format, string source, SgmlReaderTestCallback callback)
        {
            // initialize sgml reader
            var reader = new SgmlReader {
                CaseFolding        = caseFolding,
                DocType            = doctype,
                InputStream        = new StringReader(source),
                WhitespaceHandling = format ? WhitespaceHandling.None : WhitespaceHandling.All
            };

            // initialize xml writer
            var stringWriter  = new StringWriter();
            var xmlTextWriter = new XmlTextWriter(stringWriter);

            if (format)
            {
                xmlTextWriter.Formatting = Formatting.Indented;
            }
            callback(reader, xmlTextWriter);
            xmlTextWriter.Close();

            // reproduce the parsed document
            var actual = stringWriter.ToString();

            // ensure that output can be parsed again
            try {
                using (var stringReader = new StringReader(actual)) {
                    var doc = new XmlDocument();
                    doc.Load(stringReader);
                }
            } catch (Exception) {
                Assert.Fail("unable to parse sgml reader output:\n{0}", actual);
            }
            return(actual.Trim().Replace("\r", ""));
        }
Ejemplo n.º 21
0
        private string ProcessString(string strInputHtml)
        {
            string strOutputXhtml = String.Empty;

            if (strInputHtml == null || strInputHtml == "")
            {
                return("<html></html>");
            }

            SgmlReader rd = new SgmlReader();

            rd.DocType = "HTML";

            StringReader sr = new System.IO.StringReader(strInputHtml);

            rd.InputStream = sr;

            StringWriter  sw = new StringWriter();
            XmlTextWriter xw = new XmlTextWriter(sw);

            rd.Read();
            while (!rd.EOF)
            {
                try
                {
                    xw.WriteNode(rd, true);
                }
                catch { break; }
            }
            xw.Flush();
            xw.Close();

            return(sw.ToString());
        }
Ejemplo n.º 22
0
        /// <summary>
        /// 将html代码转换为xml代码,需要在try-catch块中调用。
        /// </summary>
        public static string HtmlToXml(string strInputHtml)
        {
            strInputHtml = StringUtils.ReplaceIgnoreCase(strInputHtml, "<br>", "<br />");
            strInputHtml = StringUtils.ReplaceIgnoreCase(strInputHtml, "&#", "&amp;#");
            //strInputHtml = StringUtils.ReplaceNewline(strInputHtml, NEWLINE_REPLACEMENT);
            var reader = new SgmlReader
            {
                DocType = "HTML"
            };
            var sr = new System.IO.StringReader(strInputHtml);

            reader.InputStream = sr;
            var sw = new System.IO.StringWriter();
            var w  = new XmlTextWriter(sw);

            reader.Read();
            while (!reader.EOF)
            {
                w.WriteNode(reader, true);
            }
            w.Flush();
            w.Close();
            var xml = sw.ToString();

            //xml = xml.Replace(NEWLINE_REPLACEMENT, "\r\n");
            return(xml);
        }
Ejemplo n.º 23
0
        private void button2_Click(object sender, EventArgs e)
        {
            XDocument xml;

            using (var sgml = new SgmlReader()
            {
                Href = this.textBox2.Text, IgnoreDtd = true
            })
            {
                xml = XDocument.Load(sgml); // たった3行でHtml to Xml
            }

            var ns    = xml.Root.Name.Namespace;
            var query = xml.Descendants(ns + "table")
                        .Last()
                        .Descendants(ns + "tr")
                        .Skip(1) // テーブル一行目は項目説明なので飛ばす
                        .Select(el => el.Elements(ns + "td").ToList())
                        .Select(es => new
            {
                Title       = es.First().Value,
                ReleaseDate = es.Last().Value
            });

            // 書き出し
            foreach (var item in query)
            {
                Console.WriteLine(item.Title + " - " + item.ReleaseDate);
            }
        }
Ejemplo n.º 24
0
        public static string ParseHtml(string html)
        {
            try
            {
                SgmlReader reader = new SgmlReader();
                reader.DocType     = "HTML";
                reader.InputStream = new StringReader(html);
                reader.CaseFolding = CaseFolding.ToUpper;

                StringWriter  sw = new StringWriter();
                XmlTextWriter w  = new XmlTextWriter(sw);
                w.Formatting = Formatting.Indented;
                reader.WhitespaceHandling = WhitespaceHandling.None;
                while (!reader.EOF)
                {
                    w.WriteNode(reader, true);
                }
                w.Close();
                return(sw.ToString());
            }
            catch (Exception ex)
            {
                System.Diagnostics.Debug.WriteLine(ex.ToString());
                return(string.Empty);
            }
        }
Ejemplo n.º 25
0
        public static string ToXml(string htmlData)
        {
            //Cleanse
            htmlData = CleanseSourceHtml(htmlData);
            //File.WriteAllText("d:\\Sitca.html", htmlData);
            string xmlData = null;

            using (SgmlReader sgmlReader = new SgmlReader())
            {
                //sgmlReader.DocType = "HTML";
                sgmlReader.InputStream = new StringReader(htmlData);
                using (StringWriter sw = new StringWriter())
                {
                    using (XmlWriter xmlWriter = new XmlTextWriter(sw))
                    {
                        while (!sgmlReader.EOF)
                        {
                            xmlWriter.WriteNode(sgmlReader, true);
                        }
                    }

                    xmlData = sw.ToString();
                    xmlData = xmlData.Replace("xmlns=\"http://www.w3.org/1999/xhtml\"", "");
                }
            }



            //return "<?xml version = '1.0' encoding = 'utf-8'?>" + xmlData;
            return(xmlData);
        }
Ejemplo n.º 26
0
        public AccessPointRepository()
        {
            _getAccessPoints = () =>
            {
                var urlString = "http://www.pref.ehime.jp/h12600/wifi/osirase260822.html";

                XDocument xml;
                using (var sgml = new SgmlReader()
                {
                    Href = urlString, IgnoreDtd = true
                })
                {
                    xml = XDocument.Load(sgml);
                }

                using (var geocorder = new Geocoder())
                {
                    var ns    = xml.Root.Name.Namespace;
                    var spots = xml.Descendants(ns + "table")
                                .Last()
                                .Descendants(ns + "tr")
                                .Skip(1) // タイトルをスキップ
                                .Select(e => e.Elements(ns + "td").ToList())
                                .Select(x => new AccessPoint
                    {
                        Place           = x[1].Value,
                        Address         = x[2].Value,
                        ServiceProvider = x[3].Value,
                        Location        = geocorder.GetLocationFromAddress(x[2].Value)
                    }).ToList();

                    return(spots);
                }
            };
        }
Ejemplo n.º 27
0
        private FictionBook ReadFictionBook(TextReader stream)
        {
            SgmlReader reader = new SgmlReader();

            reader.InputStream = stream;

            if (this.fb2Dtd == null)
            {
                reader.SystemLiteral = "fb2.dtd";
                this.fb2Dtd          = reader.Dtd;
            }
            else
            {
                reader.Dtd = this.fb2Dtd;
            }

            FictionBook fictionBook = ReadFictionBook(reader);

            if (reader.MarkupErrorsCount > 0)
            {
                fictionBook.ModificationType = ModificationType.Body;
            }

            return(fictionBook);
        }
Ejemplo n.º 28
0
        void Process(SgmlReader reader, string uri, bool loadAsStream) {   
            if (uri == null) {
                reader.InputStream = Console.In;
            } 
            else if (loadAsStream) {
                Uri location = new Uri(uri);
                if (location.IsFile) {   
                    reader.InputStream = new StreamReader(uri);
                } else {
                    WebRequest wr = WebRequest.Create(location);
                    reader.InputStream = new StreamReader(wr.GetResponse().GetResponseStream());
                }
            } else {
                reader.Href = uri;
            }

            if (debug) {
                Debug(reader);
                reader.Close();
                return;
            } 
            if (crawl) {
                StartCrawl(reader, uri, basify);
                return;
            } 

            if (this.encoding == null) {
                this.encoding = reader.GetEncoding();
            }

            
            XmlTextWriter w = null;
            if (output != null) {
                w = new XmlTextWriter(output, this.encoding);          
            } 
            else {
                w = new XmlTextWriter(Console.Out);
            }
            if (formatted) w.Formatting = Formatting.Indented;
            if (!noxmldecl) {
                w.WriteStartDocument();
            }
            if (testdoc) {
                XmlDocument doc = new XmlDocument();
                try {
                    doc.Load(reader);
                    doc.WriteTo(w);
                } catch (XmlException e) {
                    Console.WriteLine("Error:" + e.Message);
                    Console.WriteLine("at line " + e.LineNumber + " column " + e.LinePosition);
                }
            } else {
                reader.Read();
                while (!reader.EOF) {
                    w.WriteNode(reader, true);
                }
            }
            w.Flush();
            w.Close();          
        }
Ejemplo n.º 29
0
        /// <summary>
        /// HTMLs to XML.
        /// </summary>
        /// <param name="update">強制的に更新するか</param>
        /// <returns></returns>
        private XDocument HtmlToXml(bool update, string url)
        {
            //キャッシュのパス
            var       fullpath = GetFullPath();
            XDocument xml      = null;

            //キャッシュがある場合はそこから読む(強制更新しない場合)
            if (update == false && File.Exists(fullpath) == true)
            {
                xml = XDocument.Load(fullpath);
            }
            else
            {
                using (var sgml = new SgmlReader()
                {
                    Href = url, IgnoreDtd = true
                })
                {
                    xml = XDocument.Load(sgml); // たった3行でHtml to Xml
                }
                xml.Save(fullpath);
            }

            return(xml);
        }
Ejemplo n.º 30
0
 public Browser()
 {
     _sgmlReader                    = new SgmlReader();
     _sgmlReader.DocType            = "HTML";
     _sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
     _sgmlReader.CaseFolding        = CaseFolding.ToLower;
 }
Ejemplo n.º 31
0
        private FictionBook ReadFictionBook(TextReader stream)
        {
            SgmlReader reader = new SgmlReader();
            reader.InputStream = stream;

            if (this.fb2Dtd == null)
            {
                reader.SystemLiteral = options.dtdFile;
                this.fb2Dtd = reader.Dtd;
            }
            else
            {
                reader.Dtd = this.fb2Dtd;
            }

            FictionBook fictionBook = ReadFictionBook(reader);

            if(reader.MarkupErrorsCount > 0)
            {
                fictionBook.ModificationType = ModificationType.Body;
            }

            return fictionBook;
        }
Ejemplo n.º 32
0
        /// <summary>
        /// 将HTML文件转换为字符串
        /// </summary>
        /// <param name="html">html源文件</param>
        /// <returns></returns>
        public static string ConvertHtmlToString(string html)
        {
            XmlTextWriter xmlWriter;
            string s;
            using (SgmlReader sgmlReader = new SgmlReader())
            {
                sgmlReader.DocType = "HTML";
                sgmlReader.InputStream = new StringReader(html);
                using (StringWriter stringWriter = new StringWriter())
                {
                    using (xmlWriter = new XmlTextWriter(stringWriter))
                    {
                        while (!sgmlReader.EOF)
                        {
                            xmlWriter.WriteNode(sgmlReader, true);
                        }
                    }
                    s = stringWriter.ToString();
                }
            }

            return s;
        }