Beispiel #1
0
        public void Open(Entity parent, Uri baseUri)
        {
            Parent = parent;
            if (parent != null)
            {
                this.Html = parent.Html;
            }
            this.Line = 1;
            if (Internal)
            {
                if (this.Literal != null)
                {
                    this.stm = new StringReader(this.Literal);
                }
            }
            else if (this.Uri == null)
            {
                this.Error("Unresolvable entity '{0}'", this.Name);
            }
            else
            {
                if (baseUri != null)
                {
                    this.resolvedUri = new Uri(baseUri, this.Uri);
                }
                else
                {
                    this.resolvedUri = new Uri(this.Uri);
                }

                Stream stream;
                Encoding e = Encoding.Default;
                switch (this.resolvedUri.Scheme)
                {
                    case "file":
                        {
                            string path = this.resolvedUri.LocalPath;
                            stream = new FileStream(path, FileMode.Open, FileAccess.Read);
                        }
                        break;
                    default:
                        //Console.WriteLine("Fetching:" + ResolvedUri.AbsoluteUri);
                        HttpWebRequest wr = (HttpWebRequest) WebRequest.Create(ResolvedUri);
                        wr.UserAgent = "Mozilla/4.0 (compatible;);";
                        wr.Timeout = 10000; // in case this is running in an ASPX page.
                        if (Proxy != null)
                        {
                            wr.Proxy = new WebProxy(Proxy);
                        }
                        wr.PreAuthenticate = false;
                        // Pass the credentials of the process.
                        wr.Credentials = CredentialCache.DefaultCredentials;

                        WebResponse resp = wr.GetResponse();
                        Uri actual = resp.ResponseUri;
                        if (actual.AbsoluteUri != this.resolvedUri.AbsoluteUri)
                        {
                            this.resolvedUri = actual;
                        }
                        string contentType = resp.ContentType.ToLower();
                        string mimeType = contentType;
                        int i = contentType.IndexOf(';');
                        if (i >= 0)
                        {
                            mimeType = contentType.Substring(0, i);
                        }
                        if (StringUtilities.EqualsIgnoreCase(mimeType, "text/html"))
                        {
                            this.Html = true;
                        }

                        i = contentType.IndexOf("charset");
                        e = Encoding.Default;
                        if (i >= 0)
                        {
                            int j = contentType.IndexOf("=", i);
                            int k = contentType.IndexOf(";", j);
                            if (k < 0)
                            {
                                k = contentType.Length;
                            }
                            if (j > 0)
                            {
                                j++;
                                string charset = contentType.Substring(j, k - j).Trim();
                                try
                                {
                                    e = Encoding.GetEncoding(charset);
                                }
                                catch (Exception)
                                {
                                }
                            }
                        }
                        stream = resp.GetResponseStream();
                        break;
                }
                this.weOwnTheStream = true;
                HtmlStream html = new HtmlStream(stream, e);
                this.encoding = html.Encoding;
                this.stm = html;
            }
        }
Beispiel #2
0
        private void ProcessDocument(Stream stream, string filename, DateTime lastModifiedTime)
        {
            Encoding encoding = null;
            FictionBook document = null;

            Logger.WriteInformation("Processing fb2 document '{0}'.", filename);

            try
            {
                using (HtmlStream htmlStream = new HtmlStream(stream, Encoding.Default))
                {
                    encoding = htmlStream.Encoding;
                    document = ReadFictionBook(htmlStream);

                    ChangeDocumentVersion(document);

                    if(document.ModificationType == ModificationType.None)
                    {
                        document.ContainerDateTime = lastModifiedTime;
                    }
                }
            }
            catch (InvalidOperationException exp)
            {
                throw new InvalidFictionBookFormatException(exp.Message, exp);
            }
            catch (XmlException exp)
            {
                throw new InvalidFictionBookFormatException(exp.Message, exp);
            }

            try
            {
                if(encoding == null)
                {
                    throw new InvalidFictionBookFormatException("Can't detect a character encoding.");
                }

                long threshold = (long) (document.Document.InnerText.Length * 0.25);

                if(this.preferedCodepage != null)
                {
                    encoding = Encoding.GetEncoding((int) this.preferedCodepage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback());
                }
                else if (encoding.IsSingleByte)
                {
                    encoding = Encoding.GetEncoding(encoding.CodePage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback());
                }

                bool done = false;
                int retryCount = 0;

                do
                {
                    try
                    {
                        if (++retryCount > 2)
                        {
                            break;
                        }

                        if (encoding != null && document != null)
                        {
                            string outputFullPath = GetFilename(this.outputDirectoryGood, filename, document);
                            string outputDirectory = Path.GetDirectoryName(outputFullPath).Trim();
                            string outputFilename = Path.GetFileName(outputFullPath).Trim();

                            if(options.validate)
                            {
                                try
                                {
                                    XmlParserContext context = new XmlParserContext(null, null, "", XmlSpace.None);
                                    XmlTextReader nodeReader = new XmlTextReader(document.Document.InnerXml, XmlNodeType.Document, context);

                                    XmlReaderSettings settings = new XmlReaderSettings();
                                    settings.ValidationType = ValidationType.Schema;
                                    settings.Schemas = this.xsdSchema;

                                    XmlReader reader = XmlReader.Create(nodeReader, settings);

                                    // Parse the XML file.
                                    while (reader.Read()) ;
                                }
                                catch (XmlSchemaValidationException exp)
                                {
                                    Logger.WriteWarning(exp.Message);
                                    Logger.WriteLine(TraceEventType.Verbose, exp);

                                    outputDirectory = this.outputDirectoryNonValid;
                                }
                            }

                            SaveFictionBook(outputDirectory, outputFilename, document, encoding);
                        }

                        done = true;
                    }
                    catch (EncoderFallbackException exp)
                    {
                        if (encoding != null)
                        {
                            Logger.WriteLineIf(false, TraceEventType.Warning, filename);
                            Logger.WriteWarning("Invalid document encoding ({0}) detected, utf-8 is used instead.", encoding.WebName);
                            Logger.WriteLine(TraceEventType.Verbose, exp);
                        }

                        encoding = Encoding.UTF8;
                    }
                }
                while (!done);
            }
            catch (IOException exp)
            {
                Logger.WriteLine(TraceEventType.Critical, exp.Message);
                Logger.WriteLine(TraceEventType.Verbose, exp);
                Logger.Flush();

                Environment.Exit(1);
            }
            catch (UnauthorizedAccessException exp)
            {
                Logger.WriteError(exp.Message);
                Logger.WriteLine(TraceEventType.Verbose, exp);
            }
        }
Beispiel #3
0
        private void ProcessDocument(Stream stream, string filename, DateTime lastModifiedTime)
        {
            Encoding encoding = null;
            FictionBook document = null;

            ApplicationLogger.WriteStringToLog(string.Format("Processing fb2 document '{0}'.", filename));

            try
            {
                using (HtmlStream htmlStream = new HtmlStream(stream, Encoding.Default))
                {
                    encoding = htmlStream.Encoding;
                    document = ReadFictionBook(htmlStream);

                    ChangeDocumentVersion(document);

                    if (document.ModificationType == ModificationType.None)
                    {
                        document.ContainerDateTime = lastModifiedTime;
                    }
                }
            }
            catch (InvalidOperationException)
            {
                throw new Exception("InvalidFictionBookFormatException(exp.Message, exp)");
            }
            catch (XmlException)
            {
                throw new Exception("InvalidFictionBookFormatException(exp.Message, exp)");
            }

            try
            {
                if (encoding == null)
                {
                    throw new Exception("Can't detect a character encoding.");
                }

                long threshold = (long)(document.Document.InnerText.Length * 0.25);

                if (this.preferedCodepage != null)
                {
                    encoding = Encoding.GetEncoding((int)this.preferedCodepage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback());
                }
                else if (encoding.IsSingleByte)
                {
                    encoding = Encoding.GetEncoding(encoding.CodePage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback());
                }

                bool done = false;
                int retryCount = 0;

                do
                {
                    try
                    {
                        if (++retryCount > 2)
                        {
                            break;
                        }

                        if (encoding != null && document != null)
                        {
                            string outputFullPath = GetFilename(this.outputDirectoryGood, filename, document);
                            string outputDirectory = "Temp";
                            string outputFilename = Path.GetFileName(outputFullPath).Trim();

                            SaveFictionBook(outputDirectory, outputFilename, document, encoding);
                        }

                        done = true;
                    }
                    catch (EncoderFallbackException)
                    {
                        if (encoding != null)
                        {
                            ApplicationLogger.WriteStringToError(string.Format("Invalid document encoding ({0}) detected, utf-8 is used instead.", encoding.WebName));
                        }

                        encoding = Encoding.UTF8;
                    }
                }
                while (!done);
            }
            catch (IOException exp)
            {
                ApplicationLogger.WriteStringToError(exp.Message);
                Environment.Exit(1);
            }
            catch (UnauthorizedAccessException exp)
            {
                ApplicationLogger.WriteStringToError(exp.Message);
            }
        }