示例#1
0
        /// <summary>
        /// Return an array of all charsets that appear to be plausible matches with
        /// the input data. The array is ordered with the best quality match first.
        /// <p/>
        /// Raise an exception if
        /// <ul>
        /// <li>no charsets appear to match the input data.</li>
        /// <li>no input text has been provided</li>
        /// </ul>
        /// </summary>
        ///
        /// <returns>An array of CharsetMatch objects representing possibly matching
        /// charsets.</returns>
        /// @stable ICU 3.4
        public CharsetMatch[] DetectAll()
        {
            CharsetRecognizer csr;
            int       i;
            int       detectResults;
            int       confidence;
            ArrayList matches = new ArrayList();

            // Iterate over all possible charsets, remember all that
            // give a match quality > 0.
            for (i = 0; i < fCSRecognizers.Count; i++)
            {
                csr           = (CharsetRecognizer)fCSRecognizers[i];
                detectResults = csr.Match(this);
                confidence    = detectResults & 0x000000ff;
                if (confidence > 0)
                {
                    CharsetMatch m = new CharsetMatch(this, csr, confidence);
                    ILOG.J2CsMapping.Collections.Generics.Collections.Add(matches, m);
                }
            }
            ILOG.J2CsMapping.Collections.Collections.Sort(matches);             // CharsetMatch compares on confidence
            ILOG.J2CsMapping.Collections.Generics.Collections.Reverse(matches); // Put best match first.
            CharsetMatch[] resultArray = new CharsetMatch[matches.Count];
            resultArray = (CharsetMatch[])ILOG.J2CsMapping.Collections.Generics.Collections.ToArray(matches, resultArray);
            return(resultArray);
        }
示例#2
0
        /// <summary>
        /// Compare to other CharsetMatch objects. Comparison is based on the match
        /// confidence value, which allows CharsetDetector.detectAll() to order its
        /// results.
        /// </summary>
        ///
        /// <param name="o">the CharsetMatch object to compare against.</param>
        /// <returns>a negative integer, zero, or a positive integer as the confidence
        /// level of this CharsetMatch is less than, equal to, or greater
        /// than that of the argument.</returns>
        /// <exception cref="ClassCastException">if the argument is not a CharsetMatch.</exception>
        /// @stable ICU 3.4
        public virtual int CompareTo(Object o)
        {
            CharsetMatch other         = (CharsetMatch)o;
            int          compareResult = 0;

            if (this.fConfidence > other.fConfidence)
            {
                compareResult = 1;
            }
            else if (this.fConfidence < other.fConfidence)
            {
                compareResult = -1;
            }
            return(compareResult);
        }
示例#3
0
        /// <summary>
        /// Autodetect the charset of an inputStream, and return a String containing
        /// the converted input data.
        /// <p/>
        /// This is a convenience method that is equivalent to
        /// <c>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</c>
        /// <p/>
        /// Raise an exception if no charsets appear to match the input data.
        /// </summary>
        ///
        /// <param name="in">The source of the byte data in the unknown charset.</param>
        /// <param name="declaredEncoding">A declared encoding for the data, if available, or null or anempty string if none is available.</param>
        /// @stable ICU 3.4
        public String GetString(byte[] ins0, String declaredEncoding)
        {
            fDeclaredEncoding = declaredEncoding;

            try {
                SetText(ins0);

                CharsetMatch match = Detect();

                if (match == null)
                {
                    return(null);
                }

                return(match.GetString(-1));
            } catch (IOException e) {
                return(null);
            }
        }
示例#4
0
        /// <summary>
        /// Autodetect the charset of an inputStream, and return a Java Reader to
        /// access the converted input data.
        /// <p/>
        /// This is a convenience method that is equivalent to
        /// <c>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getReader();</c>
        /// <p/>
        /// For the input stream that supplies the character data, markSupported()
        /// must be true; the charset detection will read a small amount of data,
        /// then return the stream to its original position via the
        /// InputStream.reset() operation. The exact amount that will be read depends
        /// on the characteristics of the data itself.
        /// <p/>
        /// Raise an exception if no charsets appear to match the input data.
        /// </summary>
        ///
        /// <param name="in">The source of the byte data in the unknown charset.</param>
        /// <param name="declaredEncoding">A declared encoding for the data, if available, or null or anempty string if none is available.</param>
        /// @stable ICU 3.4
        public TextReader GetReader(Stream ins0, String declaredEncoding)
        {
            fDeclaredEncoding = declaredEncoding;

            try {
                SetText(ins0);

                CharsetMatch match = Detect();

                if (match == null)
                {
                    return(null);
                }

                return(match.GetReader());
            } catch (IOException e) {
                return(null);
            }
        }