/// <summary>
        /// Converts SGML to XML
        /// </summary>
        /// <param name="file">OFX File (SGML Format)</param>
        /// <returns>OFX File in XML format</returns>
        private string SgmltoXml(string file)
        {
            var assembly = this.GetType().Assembly;
            var docType  = assembly.GetManifestResourceStream("OfxSharpLib.ofx160.dtd");

            var reader = new SgmlReader
            {
                InputStream = new StringReader(ParseHeader(file)),
                DocType     = "OFX",
                Dtd         = SgmlDtd.Parse(null, "OFX", new StreamReader(docType), null, null, null),
            };

            var sw  = new StringWriter();
            var xml = new XmlTextWriter(sw);

            //write output of sgml reader to xml text writer
            while (!reader.EOF)
            {
                xml.WriteNode(reader, true);
            }

            //close xml text writer
            xml.Flush();
            xml.Close();

            var temp = sw.ToString().TrimStart().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);

            return(String.Join("", temp));
        }
        private FictionBook ReadFictionBook(TextReader stream)
        {
            SgmlReader reader = new SgmlReader();

            reader.InputStream = stream;

            if (this.fb2Dtd == null)
            {
                reader.SystemLiteral = "fb2.dtd";
                this.fb2Dtd          = reader.Dtd;
            }
            else
            {
                reader.Dtd = this.fb2Dtd;
            }

            FictionBook fictionBook = ReadFictionBook(reader);

            if (reader.MarkupErrorsCount > 0)
            {
                fictionBook.ModificationType = ModificationType.Body;
            }

            return(fictionBook);
        }
        protected XmlReader CreateReader(string ofxBody)
        {
            XmlReader result = null;

            if (this.header.MajorVersion < 200 && !useConverter)
            {
#if USE_SGML_PARSER
                SgmlReader sgReader = new SgmlReader
                {
                    CaseFolding = CaseFolding.ToUpper,
                    DocType     = null,
                    InputStream = new System.IO.StringReader(ofxBody),
                    //SystemLiteral = dtdSystemLiteral,
                    WhitespaceHandling = WhitespaceHandling.None,
                };
                Assembly a     = typeof(SgmlReader).Assembly;
                string   name  = "PX.Objects.CA.ofx160.dtd";
                Stream   stm   = a.GetManifestResourceStream(name);
                string[] ResRs = a.GetManifestResourceNames();
                if (stm != null)
                {
                    StreamReader sr = new StreamReader(stm);
                    sgReader.Dtd = SgmlDtd.Parse(null, sgReader.DocType, sr, null, sgReader.WebProxy, null);
                }
                result = sgReader;
#endif
            }
            else if (this.header.MajorVersion >= 200 || useConverter)
            {
                result = new XmlTextReader(new System.IO.StringReader(ofxBody));
            }
            return(result);
        }
Exemple #4
0
 private static SgmlDtd LoadDtd(string docType, string name)
 {
     using (Stream stream = typeof(SGMLTests.Tests).Assembly.GetManifestResourceStream("SgmlTests." + name))
     {
         SgmlDtd dtd = SgmlDtd.Parse(null, System.IO.Path.GetFileNameWithoutExtension(name), new StreamReader(stream), "", new NameTable(),
                                     new DesktopEntityResolver());
         dtd.Name = docType;
         return(dtd);
     }
 }
Exemple #5
0
        private SgmlDtd ParseDtd(XmlNameTable nt)
        {
            string name = string.Format("{0}.{1}.Html.dtd",
                                        typeof(WebPage).Namespace, typeof(SgmlDtd).Namespace);

            Stream       stream = typeof(SgmlDtd).Assembly.GetManifestResourceStream(name);
            StreamReader reader = new StreamReader(stream);

            return(SgmlDtd.Parse(null, "HTML", null, reader, null, null, nt));
        }
Exemple #6
0
        private static SgmlDtd LoadFb2Dtd(SgmlReader sgml)
        {
            Contract.Requires(sgml != null);
            Contract.Ensures(Contract.Result <SgmlDtd>() != null);

            Assembly assembly = Assembly.GetExecutingAssembly();

            using (Stream stream = assembly.GetManifestResourceStream(assembly.GetName().Name + ".Resources.fb2.dtd"))
            {
                using (StreamReader reader = new StreamReader(stream))
                {
                    return(SgmlDtd.Parse(new Uri("http://localhost"), sgml.DocType, null, reader, null, sgml.WebProxy, sgml.NameTable));
                }
            }
        }
Exemple #7
0
        bool Crawl(SgmlDtd dtd, XmlDocument doc, TextWriter log) {
            depth++;
            StringBuilder indent = new StringBuilder();
            for (int i = 0; i < depth; i++)
                indent.Append(" ");
      
            count++;
            Uri baseUri = new Uri(doc.BaseURI);
            XmlElement baseElmt = (XmlElement)doc.SelectSingleNode("/html/head/base");
            if (baseElmt != null) {
                string href = baseElmt.GetAttribute("href");
                if (href != "") {
                    try {
                        baseUri = new Uri(href);
                    }
                    catch (Exception ) {
                        Console.WriteLine("### Error parsing BASE href '"+href+"'");
                    }
                }
            }
            foreach (XmlElement a in doc.SelectNodes("//a")) {
                string href = a.GetAttribute("href");
                if (href != "" && href != null && depth<5) {
                    Uri local = new Uri(baseUri, href);
                    if (domain && baseUri.Host != local.Host)
                        continue;
                    string ext = Path.GetExtension(local.AbsolutePath).ToLower();
                    if (ext == ".jpg" || ext == ".gif" || ext==".mpg")
                        continue;
                    string url = local.AbsoluteUri;
                    if (!visited.ContainsKey(url)) {
                        visited.Add(url, url);
                        log.WriteLine(indent+"Loading '"+url+"'");
                        log.Flush();
                        StreamReader stm = null;
                        try {
                            HttpWebRequest wr = (HttpWebRequest)WebRequest.Create(url);
                            wr.Timeout = 10000; 
                            if (proxy != null) wr.Proxy = new WebProxy(proxy);
                            wr.PreAuthenticate = false; 
                            // Pass the credentials of the process. 
                            wr.Credentials = CredentialCache.DefaultCredentials; 

                            WebResponse resp = wr.GetResponse();
                            Uri actual = resp.ResponseUri;
                            if (actual.AbsoluteUri != url) {
                                local = new Uri(actual.AbsoluteUri);
                                log.WriteLine(indent+"Redirected to '"+actual.AbsoluteUri+"'");
                                log.Flush();
                            }           
                            if (resp.ContentType != "text/html") {
                                log.WriteLine(indent+"Skipping ContentType="+resp.ContentType);
                                log.Flush();
                                resp.Close();
                            } 
                            else {
                                stm = new StreamReader(resp.GetResponseStream());
                            }
                        } 
                        catch (Exception e) {
                            log.WriteLine(indent+"### Error opening URL: " + e.Message);
                            log.Flush();
                        }
                        if (stm != null) {
                            SgmlReader reader = new SgmlReader();
                            reader.Dtd = dtd;
                            reader.SetBaseUri(local.AbsoluteUri);
                            reader.InputStream = stm;
                            reader.WebProxy = proxy;

                            XmlDocument d2 = new XmlDocument();
                            d2.XmlResolver = null; // don't do any downloads!
                            try {
                                d2.Load(reader);
                                reader.Close();
                                stm.Close();
                                if (!Crawl(dtd, d2, log))
                                    return false;
                            } 
                            catch (Exception e) {
                                log.WriteLine(indent+"### Error parsing document '"+local.AbsoluteUri+"', "+e.Message);
                                log.Flush();
                                reader.Close();
                            }
                        }
                    }
                }
            }
            depth--;
            return true;
        }