/// <summary> /// Return an array of all charsets that appear to be plausible matches with /// the input data. The array is ordered with the best quality match first. /// <p/> /// Raise an exception if /// <ul> /// <li>no charsets appear to match the input data.</li> /// <li>no input text has been provided</li> /// </ul> /// </summary> /// /// <returns>An array of CharsetMatch objects representing possibly matching /// charsets.</returns> /// @stable ICU 3.4 public CharsetMatch[] DetectAll() { CharsetRecognizer csr; int i; int detectResults; int confidence; ArrayList matches = new ArrayList(); // Iterate over all possible charsets, remember all that // give a match quality > 0. for (i = 0; i < fCSRecognizers.Count; i++) { csr = (CharsetRecognizer)fCSRecognizers[i]; detectResults = csr.Match(this); confidence = detectResults & 0x000000ff; if (confidence > 0) { CharsetMatch m = new CharsetMatch(this, csr, confidence); ILOG.J2CsMapping.Collections.Generics.Collections.Add(matches, m); } } ILOG.J2CsMapping.Collections.Collections.Sort(matches); // CharsetMatch compares on confidence ILOG.J2CsMapping.Collections.Generics.Collections.Reverse(matches); // Put best match first. CharsetMatch[] resultArray = new CharsetMatch[matches.Count]; resultArray = (CharsetMatch[])ILOG.J2CsMapping.Collections.Generics.Collections.ToArray(matches, resultArray); return(resultArray); }
/// <summary> /// Compare to other CharsetMatch objects. Comparison is based on the match /// confidence value, which allows CharsetDetector.detectAll() to order its /// results. /// </summary> /// /// <param name="o">the CharsetMatch object to compare against.</param> /// <returns>a negative integer, zero, or a positive integer as the confidence /// level of this CharsetMatch is less than, equal to, or greater /// than that of the argument.</returns> /// <exception cref="ClassCastException">if the argument is not a CharsetMatch.</exception> /// @stable ICU 3.4 public virtual int CompareTo(Object o) { CharsetMatch other = (CharsetMatch)o; int compareResult = 0; if (this.fConfidence > other.fConfidence) { compareResult = 1; } else if (this.fConfidence < other.fConfidence) { compareResult = -1; } return(compareResult); }
/// <summary> /// Autodetect the charset of an inputStream, and return a String containing /// the converted input data. /// <p/> /// This is a convenience method that is equivalent to /// <c>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</c> /// <p/> /// Raise an exception if no charsets appear to match the input data. /// </summary> /// /// <param name="in">The source of the byte data in the unknown charset.</param> /// <param name="declaredEncoding">A declared encoding for the data, if available, or null or anempty string if none is available.</param> /// @stable ICU 3.4 public String GetString(byte[] ins0, String declaredEncoding) { fDeclaredEncoding = declaredEncoding; try { SetText(ins0); CharsetMatch match = Detect(); if (match == null) { return(null); } return(match.GetString(-1)); } catch (IOException e) { return(null); } }
/// <summary> /// Autodetect the charset of an inputStream, and return a Java Reader to /// access the converted input data. /// <p/> /// This is a convenience method that is equivalent to /// <c>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getReader();</c> /// <p/> /// For the input stream that supplies the character data, markSupported() /// must be true; the charset detection will read a small amount of data, /// then return the stream to its original position via the /// InputStream.reset() operation. The exact amount that will be read depends /// on the characteristics of the data itself. /// <p/> /// Raise an exception if no charsets appear to match the input data. /// </summary> /// /// <param name="in">The source of the byte data in the unknown charset.</param> /// <param name="declaredEncoding">A declared encoding for the data, if available, or null or anempty string if none is available.</param> /// @stable ICU 3.4 public TextReader GetReader(Stream ins0, String declaredEncoding) { fDeclaredEncoding = declaredEncoding; try { SetText(ins0); CharsetMatch match = Detect(); if (match == null) { return(null); } return(match.GetReader()); } catch (IOException e) { return(null); } }