public void Open(Entity parent, Uri baseUri) { Parent = parent; if (parent != null) { this.Html = parent.Html; } this.Line = 1; if (Internal) { if (this.Literal != null) { this.stm = new StringReader(this.Literal); } } else if (this.Uri == null) { this.Error("Unresolvable entity '{0}'", this.Name); } else { if (baseUri != null) { this.resolvedUri = new Uri(baseUri, this.Uri); } else { this.resolvedUri = new Uri(this.Uri); } Stream stream; Encoding e = Encoding.Default; switch (this.resolvedUri.Scheme) { case "file": { string path = this.resolvedUri.LocalPath; stream = new FileStream(path, FileMode.Open, FileAccess.Read); } break; default: //Console.WriteLine("Fetching:" + ResolvedUri.AbsoluteUri); HttpWebRequest wr = (HttpWebRequest) WebRequest.Create(ResolvedUri); wr.UserAgent = "Mozilla/4.0 (compatible;);"; wr.Timeout = 10000; // in case this is running in an ASPX page. if (Proxy != null) { wr.Proxy = new WebProxy(Proxy); } wr.PreAuthenticate = false; // Pass the credentials of the process. wr.Credentials = CredentialCache.DefaultCredentials; WebResponse resp = wr.GetResponse(); Uri actual = resp.ResponseUri; if (actual.AbsoluteUri != this.resolvedUri.AbsoluteUri) { this.resolvedUri = actual; } string contentType = resp.ContentType.ToLower(); string mimeType = contentType; int i = contentType.IndexOf(';'); if (i >= 0) { mimeType = contentType.Substring(0, i); } if (StringUtilities.EqualsIgnoreCase(mimeType, "text/html")) { this.Html = true; } i = contentType.IndexOf("charset"); e = Encoding.Default; if (i >= 0) { int j = contentType.IndexOf("=", i); int k = contentType.IndexOf(";", j); if (k < 0) { k = contentType.Length; } if (j > 0) { j++; string charset = contentType.Substring(j, k - j).Trim(); try { e = Encoding.GetEncoding(charset); } catch (Exception) { } } } stream = resp.GetResponseStream(); break; } this.weOwnTheStream = true; HtmlStream html = new HtmlStream(stream, e); this.encoding = html.Encoding; this.stm = html; } }
private void ProcessDocument(Stream stream, string filename, DateTime lastModifiedTime) { Encoding encoding = null; FictionBook document = null; Logger.WriteInformation("Processing fb2 document '{0}'.", filename); try { using (HtmlStream htmlStream = new HtmlStream(stream, Encoding.Default)) { encoding = htmlStream.Encoding; document = ReadFictionBook(htmlStream); ChangeDocumentVersion(document); if(document.ModificationType == ModificationType.None) { document.ContainerDateTime = lastModifiedTime; } } } catch (InvalidOperationException exp) { throw new InvalidFictionBookFormatException(exp.Message, exp); } catch (XmlException exp) { throw new InvalidFictionBookFormatException(exp.Message, exp); } try { if(encoding == null) { throw new InvalidFictionBookFormatException("Can't detect a character encoding."); } long threshold = (long) (document.Document.InnerText.Length * 0.25); if(this.preferedCodepage != null) { encoding = Encoding.GetEncoding((int) this.preferedCodepage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback()); } else if (encoding.IsSingleByte) { encoding = Encoding.GetEncoding(encoding.CodePage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback()); } bool done = false; int retryCount = 0; do { try { if (++retryCount > 2) { break; } if (encoding != null && document != null) { string outputFullPath = GetFilename(this.outputDirectoryGood, filename, document); string outputDirectory = Path.GetDirectoryName(outputFullPath).Trim(); string outputFilename = Path.GetFileName(outputFullPath).Trim(); if(options.validate) { try { XmlParserContext context = new XmlParserContext(null, null, "", XmlSpace.None); XmlTextReader nodeReader = new XmlTextReader(document.Document.InnerXml, XmlNodeType.Document, context); XmlReaderSettings settings = new XmlReaderSettings(); settings.ValidationType = ValidationType.Schema; settings.Schemas = this.xsdSchema; XmlReader reader = XmlReader.Create(nodeReader, settings); // Parse the XML file. while (reader.Read()) ; } catch (XmlSchemaValidationException exp) { Logger.WriteWarning(exp.Message); Logger.WriteLine(TraceEventType.Verbose, exp); outputDirectory = this.outputDirectoryNonValid; } } SaveFictionBook(outputDirectory, outputFilename, document, encoding); } done = true; } catch (EncoderFallbackException exp) { if (encoding != null) { Logger.WriteLineIf(false, TraceEventType.Warning, filename); Logger.WriteWarning("Invalid document encoding ({0}) detected, utf-8 is used instead.", encoding.WebName); Logger.WriteLine(TraceEventType.Verbose, exp); } encoding = Encoding.UTF8; } } while (!done); } catch (IOException exp) { Logger.WriteLine(TraceEventType.Critical, exp.Message); Logger.WriteLine(TraceEventType.Verbose, exp); Logger.Flush(); Environment.Exit(1); } catch (UnauthorizedAccessException exp) { Logger.WriteError(exp.Message); Logger.WriteLine(TraceEventType.Verbose, exp); } }
private void ProcessDocument(Stream stream, string filename, DateTime lastModifiedTime) { Encoding encoding = null; FictionBook document = null; ApplicationLogger.WriteStringToLog(string.Format("Processing fb2 document '{0}'.", filename)); try { using (HtmlStream htmlStream = new HtmlStream(stream, Encoding.Default)) { encoding = htmlStream.Encoding; document = ReadFictionBook(htmlStream); ChangeDocumentVersion(document); if (document.ModificationType == ModificationType.None) { document.ContainerDateTime = lastModifiedTime; } } } catch (InvalidOperationException) { throw new Exception("InvalidFictionBookFormatException(exp.Message, exp)"); } catch (XmlException) { throw new Exception("InvalidFictionBookFormatException(exp.Message, exp)"); } try { if (encoding == null) { throw new Exception("Can't detect a character encoding."); } long threshold = (long)(document.Document.InnerText.Length * 0.25); if (this.preferedCodepage != null) { encoding = Encoding.GetEncoding((int)this.preferedCodepage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback()); } else if (encoding.IsSingleByte) { encoding = Encoding.GetEncoding(encoding.CodePage, new EncoderCharEntityFallback(threshold), new DecoderExceptionFallback()); } bool done = false; int retryCount = 0; do { try { if (++retryCount > 2) { break; } if (encoding != null && document != null) { string outputFullPath = GetFilename(this.outputDirectoryGood, filename, document); string outputDirectory = "Temp"; string outputFilename = Path.GetFileName(outputFullPath).Trim(); SaveFictionBook(outputDirectory, outputFilename, document, encoding); } done = true; } catch (EncoderFallbackException) { if (encoding != null) { ApplicationLogger.WriteStringToError(string.Format("Invalid document encoding ({0}) detected, utf-8 is used instead.", encoding.WebName)); } encoding = Encoding.UTF8; } } while (!done); } catch (IOException exp) { ApplicationLogger.WriteStringToError(exp.Message); Environment.Exit(1); } catch (UnauthorizedAccessException exp) { ApplicationLogger.WriteStringToError(exp.Message); } }