public virtual ManagedHttpClientConnection Create(HttpRoute route, ConnectionConfig config) { ConnectionConfig cconfig = config != null ? config : ConnectionConfig.Default; CharsetDecoder chardecoder = null; CharsetEncoder charencoder = null; Encoding charset = cconfig.GetCharset(); CodingErrorAction malformedInputAction = cconfig.GetMalformedInputAction() != null ? cconfig.GetMalformedInputAction() : CodingErrorAction.Report; CodingErrorAction unmappableInputAction = cconfig.GetUnmappableInputAction() != null ? cconfig.GetUnmappableInputAction() : CodingErrorAction.Report; if (charset != null) { chardecoder = charset.NewDecoder(); chardecoder.OnMalformedInput(malformedInputAction); chardecoder.OnUnmappableCharacter(unmappableInputAction); charencoder = charset.NewEncoder(); charencoder.OnMalformedInput(malformedInputAction); charencoder.OnUnmappableCharacter(unmappableInputAction); } string id = "http-outgoing-" + System.Convert.ToString(Counter.GetAndIncrement()); return(new LoggingManagedHttpClientConnection(id, log, headerlog, wirelog, cconfig .GetBufferSize(), cconfig.GetFragmentSizeHint(), chardecoder, charencoder, cconfig .GetMessageConstraints(), null, null, requestWriterFactory, responseParserFactory )); }
internal StringDecoder(Charset cs, String rcn) { this.RequestedCharsetName_Renamed = rcn; this.Cs = cs; this.Cd = cs.NewDecoder().OnMalformedInput(CodingErrorAction.REPLACE).OnUnmappableCharacter(CodingErrorAction.REPLACE); this.IsTrusted = (cs.GetType().ClassLoader0 == null); }
/// <summary> /// same as <seealso cref="#getWordSet(ResourceLoader, String, boolean)"/>, /// except the input is in snowball format. /// </summary> protected internal CharArraySet getSnowballWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase) { assureMatchVersion(); IList <string> files = splitFileNames(wordFiles); CharArraySet words = null; if (files.Count > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase); foreach (string file in files) { InputStream stream = null; TextReader reader = null; try { stream = loader.openResource(file.Trim()); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT); reader = new InputStreamReader(stream, decoder); WordlistLoader.getSnowballWordSet(reader, words); } finally { IOUtils.closeWhileHandlingException(reader, stream); } } } return(words); }
/// <summary> /// Load synonyms with the given <seealso cref="SynonymMap.Parser"/> class. /// </summary> private SynonymMap LoadSynonyms(ResourceLoader loader, string cname, bool dedup, Analyzer analyzer) { CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT); SynonymMap.Parser parser; Type clazz = loader.findClass(cname, typeof(SynonymMap.Parser)); try { parser = clazz.getConstructor(typeof(bool), typeof(bool), typeof(Analyzer)).newInstance(dedup, expand, analyzer); } catch (Exception e) { throw new Exception(e); } if (File.Exists(synonyms)) { decoder.Reset(); parser.Parse(new InputStreamReader(loader.openResource(synonyms), decoder)); } else { IList <string> files = splitFileNames(synonyms); foreach (string file in files) { decoder.reset(); parser.Parse(new InputStreamReader(loader.openResource(file), decoder)); } } return(parser.Build()); }
/// <summary> /// Creates an InputStreamReader that uses the given charset decoder. /// </summary> /// <param name="in"> An InputStream </param> /// <param name="dec"> A charset decoder /// /// @since 1.4 /// @spec JSR-51 </param> public InputStreamReader(InputStream @in, CharsetDecoder dec) : base(@in) { if (dec == null) { throw new NullPointerException("charset decoder"); } Sd = StreamDecoder.forInputStreamReader(@in, this, dec); }
private CharsetDecoder Decoder() { if (Dec == null) { Dec = Cs.NewDecoder().OnMalformedInput(CodingErrorAction.REPORT).OnUnmappableCharacter(CodingErrorAction.REPORT); } return(Dec); }
/// <exception cref="Sharpen.CharacterCodingException"></exception> private static string Decode(ByteBuffer b, System.Text.Encoding charset) { CharsetDecoder d = charset.NewDecoder(); d.OnMalformedInput(CodingErrorAction.REPORT); d.OnUnmappableCharacter(CodingErrorAction.REPORT); return(d.Decode(b).ToString()); }
public GenericDecoder(String charsetName) { super(charsetName); _charset = Charset.forName(charsetName); _decoder = _charset.newDecoder(); }
public DefaultManagedHttpClientConnection(string id, int buffersize, int fragmentSizeHint , CharsetDecoder chardecoder, CharsetEncoder charencoder, MessageConstraints constraints , ContentLengthStrategy incomingContentStrategy, ContentLengthStrategy outgoingContentStrategy , HttpMessageWriterFactory <IHttpRequest> requestWriterFactory, HttpMessageParserFactory <HttpResponse> responseParserFactory) : base(buffersize, fragmentSizeHint, chardecoder , charencoder, constraints, incomingContentStrategy, outgoingContentStrategy, requestWriterFactory , responseParserFactory) { this.id = id; this.attributes = new ConcurrentHashMap <string, object>(); }
/// <summary>Creates new instance of DefaultBHttpClientConnection.</summary> /// <remarks>Creates new instance of DefaultBHttpClientConnection.</remarks> /// <param name="buffersize">buffer size. Must be a positive number.</param> /// <param name="fragmentSizeHint">fragment size hint.</param> /// <param name="chardecoder"> /// decoder to be used for decoding HTTP protocol elements. /// If <code>null</code> simple type cast will be used for byte to char conversion. /// </param> /// <param name="charencoder"> /// encoder to be used for encoding HTTP protocol elements. /// If <code>null</code> simple type cast will be used for char to byte conversion. /// </param> /// <param name="constraints"> /// Message constraints. If <code>null</code> /// <see cref="Org.Apache.Http.Config.MessageConstraints.Default">Org.Apache.Http.Config.MessageConstraints.Default /// </see> /// will be used. /// </param> /// <param name="incomingContentStrategy"> /// incoming content length strategy. If <code>null</code> /// <see cref="Org.Apache.Http.Impl.Entity.LaxContentLengthStrategy.Instance">Org.Apache.Http.Impl.Entity.LaxContentLengthStrategy.Instance /// </see> /// will be used. /// </param> /// <param name="outgoingContentStrategy"> /// outgoing content length strategy. If <code>null</code> /// <see cref="Org.Apache.Http.Impl.Entity.StrictContentLengthStrategy.Instance">Org.Apache.Http.Impl.Entity.StrictContentLengthStrategy.Instance /// </see> /// will be used. /// </param> /// <param name="requestWriterFactory"> /// request writer factory. If <code>null</code> /// <see cref="Org.Apache.Http.Impl.IO.DefaultHttpRequestWriterFactory.Instance">Org.Apache.Http.Impl.IO.DefaultHttpRequestWriterFactory.Instance /// </see> /// will be used. /// </param> /// <param name="responseParserFactory"> /// response parser factory. If <code>null</code> /// <see cref="Org.Apache.Http.Impl.IO.DefaultHttpResponseParserFactory.Instance">Org.Apache.Http.Impl.IO.DefaultHttpResponseParserFactory.Instance /// </see> /// will be used. /// </param> internal DefaultBHttpClientConnection(int buffersize, int fragmentSizeHint, CharsetDecoder chardecoder, CharsetEncoder charencoder, MessageConstraints constraints, ContentLengthStrategy incomingContentStrategy, ContentLengthStrategy outgoingContentStrategy, HttpMessageWriterFactory <IHttpRequest> requestWriterFactory, HttpMessageParserFactory <HttpResponse> responseParserFactory ) : base(buffersize, fragmentSizeHint, chardecoder, charencoder, constraints, incomingContentStrategy , outgoingContentStrategy) { this.requestWriter = (requestWriterFactory != null ? requestWriterFactory : DefaultHttpRequestWriterFactory .Instance).Create(GetSessionOutputBuffer()); this.responseParser = (responseParserFactory != null ? responseParserFactory : DefaultHttpResponseParserFactory .Instance).Create(GetSessionInputBuffer(), constraints); }
/// <summary>Creates new instance of SessionInputBufferImpl.</summary> /// <remarks>Creates new instance of SessionInputBufferImpl.</remarks> /// <param name="metrics">HTTP transport metrics.</param> /// <param name="buffersize">buffer size. Must be a positive number.</param> /// <param name="minChunkLimit"> /// size limit below which data chunks should be buffered in memory /// in order to minimize native method invocations on the underlying network socket. /// The optimal value of this parameter can be platform specific and defines a trade-off /// between performance of memory copy operations and that of native method invocation. /// If negative default chunk limited will be used. /// </param> /// <param name="constraints"> /// Message constraints. If <code>null</code> /// <see cref="Org.Apache.Http.Config.MessageConstraints.Default">Org.Apache.Http.Config.MessageConstraints.Default /// </see> /// will be used. /// </param> /// <param name="chardecoder"> /// chardecoder to be used for decoding HTTP protocol elements. /// If <code>null</code> simple type cast will be used for byte to char conversion. /// </param> internal SessionInputBufferImpl(HttpTransportMetricsImpl metrics, int buffersize, int minChunkLimit, MessageConstraints constraints, CharsetDecoder chardecoder) { Args.NotNull(metrics, "HTTP transport metrcis"); Args.Positive(buffersize, "Buffer size"); this.metrics = metrics; this.buffer = new byte[buffersize]; this.bufferpos = 0; this.bufferlen = 0; this.minChunkLimit = minChunkLimit >= 0 ? minChunkLimit : 512; this.constraints = constraints != null ? constraints : MessageConstraints.Default; this.linebuffer = new ByteArrayBuffer(buffersize); this.decoder = chardecoder; }
/// <exception cref="CharacterCodingException"/> private static string Decode(ByteBuffer utf8, bool replace) { CharsetDecoder decoder = DecoderFactory.Get(); if (replace) { decoder.OnMalformedInput(CodingErrorAction.Replace); decoder.OnUnmappableCharacter(CodingErrorAction.Replace); } string str = decoder.Decode(utf8).ToString(); // set decoder back to its default value: REPORT if (replace) { decoder.OnMalformedInput(CodingErrorAction.Report); decoder.OnUnmappableCharacter(CodingErrorAction.Report); } return(str); }
internal static string GuessEncoding([NotNull] sbyte[] bytes) { string[] encodings = new string[] { Utf8, Runtime.GetProperty("file.encoding"), Iso88591 }; foreach (string encoding in encodings) { CharsetDecoder cs = Sharpen.Extensions.GetEncoding(encoding).NewDecoder(); try { cs.Decode(ByteBuffer.Wrap(bytes)); return(encoding); } catch (CharacterCodingException) { } } // fall through... // No encodings succeeded. Return null. return(null); }
/// <summary>Creates new instance of BHttpConnectionBase.</summary> /// <remarks>Creates new instance of BHttpConnectionBase.</remarks> /// <param name="buffersize">buffer size. Must be a positive number.</param> /// <param name="fragmentSizeHint">fragment size hint.</param> /// <param name="chardecoder"> /// decoder to be used for decoding HTTP protocol elements. /// If <code>null</code> simple type cast will be used for byte to char conversion. /// </param> /// <param name="charencoder"> /// encoder to be used for encoding HTTP protocol elements. /// If <code>null</code> simple type cast will be used for char to byte conversion. /// </param> /// <param name="constraints"> /// Message constraints. If <code>null</code> /// <see cref="Org.Apache.Http.Config.MessageConstraints.Default">Org.Apache.Http.Config.MessageConstraints.Default /// </see> /// will be used. /// </param> /// <param name="incomingContentStrategy"> /// incoming content length strategy. If <code>null</code> /// <see cref="Org.Apache.Http.Impl.Entity.LaxContentLengthStrategy.Instance">Org.Apache.Http.Impl.Entity.LaxContentLengthStrategy.Instance /// </see> /// will be used. /// </param> /// <param name="outgoingContentStrategy"> /// outgoing content length strategy. If <code>null</code> /// <see cref="Org.Apache.Http.Impl.Entity.StrictContentLengthStrategy.Instance">Org.Apache.Http.Impl.Entity.StrictContentLengthStrategy.Instance /// </see> /// will be used. /// </param> internal BHttpConnectionBase(int buffersize, int fragmentSizeHint, CharsetDecoder chardecoder, CharsetEncoder charencoder, MessageConstraints constraints, ContentLengthStrategy incomingContentStrategy, ContentLengthStrategy outgoingContentStrategy) : base( ) { Args.Positive(buffersize, "Buffer size"); HttpTransportMetricsImpl inTransportMetrics = new HttpTransportMetricsImpl(); HttpTransportMetricsImpl outTransportMetrics = new HttpTransportMetricsImpl(); this.inbuffer = new SessionInputBufferImpl(inTransportMetrics, buffersize, -1, constraints != null ? constraints : MessageConstraints.Default, chardecoder); this.outbuffer = new SessionOutputBufferImpl(outTransportMetrics, buffersize, fragmentSizeHint , charencoder); this.connMetrics = new HttpConnectionMetricsImpl(inTransportMetrics, outTransportMetrics ); this.incomingContentStrategy = incomingContentStrategy != null ? incomingContentStrategy : LaxContentLengthStrategy.Instance; this.outgoingContentStrategy = outgoingContentStrategy != null ? outgoingContentStrategy : StrictContentLengthStrategy.Instance; }
public TextReader(String charsetName) { try { Charset cs = Charset.forName(charsetName); this.decoder = cs.newDecoder(); this.decoder.onMalformedInput(CodingErrorAction.REPLACE); this.decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } catch (IllegalCharsetNameException e) { String message = Logging.getMessage("generic.InvalidCharsetName", charsetName); Logging.logger().severe(message); throw new ArgumentException(message); } catch (UnsupportedCharsetException e) { String message = Logging.getMessage("generic.InvalidCharsetName", charsetName); Logging.logger().severe(message); throw new ArgumentException(message); } }
internal String ToString(sbyte[] ba, int length) { CharsetDecoder cd = Decoder().Reset(); int len = (int)(length * cd.MaxCharsPerByte()); char[] ca = new char[len]; if (len == 0) { return(new String(ca)); } // UTF-8 only for now. Other ArrayDeocder only handles // CodingErrorAction.REPLACE mode. ZipCoder uses // REPORT mode. if (IsUTF8 && cd is ArrayDecoder) { int clen = ((ArrayDecoder)cd).decode(ba, 0, length, ca); if (clen == -1) // malformed { throw new IllegalArgumentException("MALFORMED"); } return(new String(ca, 0, clen)); } ByteBuffer bb = ByteBuffer.Wrap(ba, 0, length); CharBuffer cb = CharBuffer.Wrap(ca); CoderResult cr = cd.Decode(bb, cb, true); if (!cr.Underflow) { throw new IllegalArgumentException(cr.ToString()); } cr = cd.Flush(cb); if (!cr.Underflow) { throw new IllegalArgumentException(cr.ToString()); } return(new String(ca, 0, cb.Position())); }
public void switchEncoding(Encoding newEnc) { this.decoder = newEnc.newDecoder(); initDecoder(); }
// -- Character streams from channels -- /// <summary> /// Constructs a reader that decodes bytes from the given channel using the /// given decoder. /// /// <para> The resulting stream will contain an internal input buffer of at /// least <tt>minBufferCap</tt> bytes. The stream's <tt>read</tt> methods /// will, as needed, fill the buffer by reading bytes from the underlying /// channel; if the channel is in non-blocking mode when bytes are to be /// read then an <seealso cref="IllegalBlockingModeException"/> will be thrown. The /// resulting stream will not otherwise be buffered, and it will not support /// the <seealso cref="Reader#mark mark"/> or <seealso cref="Reader#reset reset"/> methods. /// Closing the stream will in turn cause the channel to be closed. </para> /// </summary> /// <param name="ch"> /// The channel from which bytes will be read /// </param> /// <param name="dec"> /// The charset decoder to be used /// </param> /// <param name="minBufferCap"> /// The minimum capacity of the internal byte buffer, /// or <tt>-1</tt> if an implementation-dependent /// default capacity is to be used /// </param> /// <returns> A new reader </returns> public static Reader NewReader(ReadableByteChannel ch, CharsetDecoder dec, int minBufferCap) { CheckNotNull(ch, "ch"); return(StreamDecoder.forDecoder(ch, dec.Reset(), minBufferCap)); }
internal static char[] Decode(Charset cs, sbyte[] ba, int off, int len) { // (1)We never cache the "external" cs, the only benefit of creating // an additional StringDe/Encoder object to wrap it is to share the // de/encode() method. These SD/E objects are short-lifed, the young-gen // gc should be able to take care of them well. But the best approash // is still not to generate them if not really necessary. // (2)The defensive copy of the input byte/char[] has a big performance // impact, as well as the outgoing result byte/char[]. Need to do the // optimization check of (sm==null && classLoader0==null) for both. // (3)getClass().getClassLoader0() is expensive // (4)There might be a timing gap in isTrusted setting. getClassLoader0() // is only chcked (and then isTrusted gets set) when (SM==null). It is // possible that the SM==null for now but then SM is NOT null later // when safeTrim() is invoked...the "safe" way to do is to redundant // check (... && (isTrusted || SM == null || getClassLoader0())) in trim // but it then can be argued that the SM is null when the opertaion // is started... CharsetDecoder cd = cs.NewDecoder(); int en = Scale(len, cd.MaxCharsPerByte()); char[] ca = new char[en]; if (len == 0) { return(ca); } bool isTrusted = false; if (System.SecurityManager != null) { if (!(isTrusted = (cs.GetType().ClassLoader0 == null))) { ba = Arrays.CopyOfRange(ba, off, off + len); off = 0; } } cd.OnMalformedInput(CodingErrorAction.REPLACE).OnUnmappableCharacter(CodingErrorAction.REPLACE).Reset(); if (cd is ArrayDecoder) { int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); return(SafeTrim(ca, clen, cs, isTrusted)); } else { ByteBuffer bb = ByteBuffer.Wrap(ba, off, len); CharBuffer cb = CharBuffer.Wrap(ca); try { CoderResult cr = cd.Decode(bb, cb, true); if (!cr.Underflow) { cr.ThrowException(); } cr = cd.Flush(cb); if (!cr.Underflow) { cr.ThrowException(); } } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return(SafeTrim(ca, cb.Position(), cs, isTrusted)); } }
/** * @param inputStream * @param errorHandler * @param locator * @throws IOException * @throws SAXException */ public HtmlInputStreamReader(Stream inputStream, ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver, Heuristics heuristics) { this.inputStream = inputStream; this.errorHandler = errorHandler; this.tokenizer = tokenizer; this.driver = driver; this.sniffing = true; Encoding encoding = (new BomSniffer(this)).sniff(); if (encoding == null) { position = 0; encoding = (new MetaSniffer(errorHandler, this)).sniff(this); if (encoding == null && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) { encoding = (new ChardetSniffer(byteArray, limit)).sniff(); } if (encoding == null && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) { position = 0; encoding = (new IcuDetectorSniffer(this)).sniff(); } sniffing = false; if (encoding == null) { encoding = Encoding.WINDOWS1252; } if (driver != null) { driver.setEncoding(encoding, Confidence.TENTATIVE); } } else { if (encoding == Encoding.UTF8) { if (driver != null) { driver.setEncoding(Encoding.UTF8, Confidence.CERTAIN); } } else { if (driver != null) { driver.setEncoding(Encoding.UTF16, Confidence.CERTAIN); } } } this.decoder = encoding.newDecoder(); sniffing = false; position = 0; bytesRead = 0; byteBuffer.position(position); byteBuffer.limit(limit); initDecoder(); }
public LoggingManagedHttpClientConnection(string id, Log log, Log headerlog, Log wirelog, int buffersize, int fragmentSizeHint, CharsetDecoder chardecoder, CharsetEncoder charencoder, MessageConstraints constraints, ContentLengthStrategy incomingContentStrategy , ContentLengthStrategy outgoingContentStrategy, HttpMessageWriterFactory <IHttpRequest > requestWriterFactory, HttpMessageParserFactory <HttpResponse> responseParserFactory ) : base(id, buffersize, fragmentSizeHint, chardecoder, charencoder, constraints , incomingContentStrategy, outgoingContentStrategy, requestWriterFactory, responseParserFactory ) { this.log = log; this.headerlog = headerlog; this.wire = new Wire(wirelog, id); }
public HtmlInputStreamReader(Stream inputStream, ErrorHandler errorHandler, Tokenizer tokenizer, Driver driver, Encoding encoding) { this.inputStream = inputStream; this.errorHandler = errorHandler; this.tokenizer = tokenizer; this.driver = driver; this.decoder = encoding.newDecoder(); this.sniffing = false; position = 0; bytesRead = 0; byteBuffer.position(0); byteBuffer.limit(0); shouldReadBytes = true; initDecoder(); }
internal DefaultBHttpServerConnection(int buffersize, CharsetDecoder chardecoder, CharsetEncoder charencoder, MessageConstraints constraints) : this(buffersize, buffersize, chardecoder , charencoder, constraints, null, null, null, null) { }