예제 #1
0
        public SingleByteCharsetProbeSet()
        {
            this.probers[0] = new SingleByteCharSetProber(new Win1251CyrillicModel());
            this.probers[1] = new SingleByteCharSetProber(new Koi8rCyrillicModel());
            this.probers[2] = new SingleByteCharSetProber(new Latin5CyrillicModel());
            this.probers[3] = new SingleByteCharSetProber(new MacCyrillicModel());
            this.probers[4] = new SingleByteCharSetProber(new Ibm866CyrillicModel());
            this.probers[5] = new SingleByteCharSetProber(new Ibm855CyrillicModel());
            this.probers[6] = new SingleByteCharSetProber(new Latin7GreekModel());
            this.probers[7] = new SingleByteCharSetProber(new Win1253GreekModel());
            this.probers[8] = new SingleByteCharSetProber(new Latin5BulgarianModel());
            this.probers[9] = new SingleByteCharSetProber(new Win1251BulgarianModel());
            var hebprober = new HebrewProber();

            this.probers[10] = hebprober;

            // Logical
            this.probers[11] = new SingleByteCharSetProber(new Win1255HebrewModel(), false, hebprober);

            // Visual
            this.probers[12] = new SingleByteCharSetProber(new Win1255HebrewModel(), true, hebprober);
            hebprober.SetModelProbers(this.probers[11], this.probers[12]);

            // disable latin2 before latin1 is available, otherwise all latin1
            // will be detected as latin2 because of their similarity.
            // probers[13] = new SingleByteCharSetProber(new Latin2HungarianModel());
            // probers[14] = new SingleByteCharSetProber(new Win1250HungarianModel());
            this.InitialiseProbes();
        }
        public SingleByteCharsetProbeSet()
        {
            this.probers[0] = new SingleByteCharSetProber(new Win1251CyrillicModel());
            this.probers[1] = new SingleByteCharSetProber(new Koi8rCyrillicModel());
            this.probers[2] = new SingleByteCharSetProber(new Latin5CyrillicModel());
            this.probers[3] = new SingleByteCharSetProber(new MacCyrillicModel());
            this.probers[4] = new SingleByteCharSetProber(new Ibm866CyrillicModel());
            this.probers[5] = new SingleByteCharSetProber(new Ibm855CyrillicModel());
            this.probers[6] = new SingleByteCharSetProber(new Latin7GreekModel());
            this.probers[7] = new SingleByteCharSetProber(new Win1253GreekModel());
            this.probers[8] = new SingleByteCharSetProber(new Latin5BulgarianModel());
            this.probers[9] = new SingleByteCharSetProber(new Win1251BulgarianModel());
            var hebprober = new HebrewProber();
            this.probers[10] = hebprober;

            // Logical
            this.probers[11] = new SingleByteCharSetProber(new Win1255HebrewModel(), false, hebprober);

            // Visual
            this.probers[12] = new SingleByteCharSetProber(new Win1255HebrewModel(), true, hebprober);
            hebprober.SetModelProbers(this.probers[11], this.probers[12]);

            // disable latin2 before latin1 is available, otherwise all latin1
            // will be detected as latin2 because of their similarity.
            // probers[13] = new SingleByteCharSetProber(new Latin2HungarianModel());
            // probers[14] = new SingleByteCharSetProber(new Win1250HungarianModel());
            this.InitialiseProbes();
        }
    public SBCSGroupProber()
    {
        // Russian
        probers[0] = new SingleByteCharSetProber(new Windows_1251_RussianModel());
        probers[1] = new SingleByteCharSetProber(new Koi8r_Model());
        probers[2] = new SingleByteCharSetProber(new Iso_8859_5_RussianModel());
        probers[3] = new SingleByteCharSetProber(new X_Mac_Cyrillic_RussianModel());
        probers[4] = new SingleByteCharSetProber(new Ibm866_RussianModel());
        probers[5] = new SingleByteCharSetProber(new Ibm855_RussianModel());

        // Greek
        probers[6] = new SingleByteCharSetProber(new Iso_8859_7_GreekModel());
        probers[7] = new SingleByteCharSetProber(new Windows_1253_GreekModel());

        // Bulgarian
        probers[8] = new SingleByteCharSetProber(new Iso_8859_5_BulgarianModel());
        probers[9] = new SingleByteCharSetProber(new Windows_1251_BulgarianModel());

        // Hebrew
        HebrewProber hebprober = new HebrewProber();

        probers[10] = hebprober;
        // Logical
        probers[11] = new SingleByteCharSetProber(new Windows_1255_HebrewModel(), false, hebprober);
        // Visual
        probers[12] = new SingleByteCharSetProber(new Windows_1255_HebrewModel(), true, hebprober);
        hebprober.SetModelProbers(probers[11], probers[12]);

        // Thai
        probers[13] = new SingleByteCharSetProber(new Tis_620_ThaiModel());
        probers[14] = new SingleByteCharSetProber(new Iso_8859_11_ThaiModel());

        // French
        probers[15] = new SingleByteCharSetProber(new Iso_8859_1_FrenchModel());
        probers[16] = new SingleByteCharSetProber(new Iso_8859_15_FrenchModel());
        probers[17] = new SingleByteCharSetProber(new Windows_1252_FrenchModel());

        // Spanish
        probers[18] = new SingleByteCharSetProber(new Iso_8859_1_SpanishModel());
        probers[19] = new SingleByteCharSetProber(new Iso_8859_15_SpanishModel());
        probers[20] = new SingleByteCharSetProber(new Windows_1252_SpanishModel());

        // Is the following still valid?
        // disable latin2 before latin1 is available, otherwise all latin1
        // will be detected as latin2 because of their similarity
        // Hungarian
        probers[21] = new SingleByteCharSetProber(new Iso_8859_2_HungarianModel());
        probers[22] = new SingleByteCharSetProber(new Windows_1250_HungarianModel());

        // German
        probers[23] = new SingleByteCharSetProber(new Iso_8859_1_GermanModel());
        probers[24] = new SingleByteCharSetProber(new Windows_1252_GermanModel());

        // Esperanto
        probers[25] = new SingleByteCharSetProber(new Iso_8859_3_EsperantoModel());

        // Turkish
        probers[26] = new SingleByteCharSetProber(new Iso_8859_3_TurkishModel());
        probers[27] = new SingleByteCharSetProber(new Iso_8859_9_TurkishModel());

        // Arabic
        probers[28] = new SingleByteCharSetProber(new Iso_8859_6_ArabicModel());
        probers[29] = new SingleByteCharSetProber(new Windows_1256_ArabicModel());

        // Vietnamese
        probers[30] = new SingleByteCharSetProber(new Viscii_VietnameseModel());
        probers[31] = new SingleByteCharSetProber(new Windows_1258_VietnameseModel());

        // Danish
        probers[32] = new SingleByteCharSetProber(new Iso_8859_15_DanishModel());
        probers[33] = new SingleByteCharSetProber(new Iso_8859_1_DanishModel());
        probers[34] = new SingleByteCharSetProber(new Windows_1252_DanishModel());

        // Lithuanian
        probers[35] = new SingleByteCharSetProber(new Iso_8859_13_LithuanianModel());
        probers[36] = new SingleByteCharSetProber(new Iso_8859_10_LithuanianModel());
        probers[37] = new SingleByteCharSetProber(new Iso_8859_4_LithuanianModel());

        // Latvian
        probers[38] = new SingleByteCharSetProber(new Iso_8859_13_LatvianModel());
        probers[39] = new SingleByteCharSetProber(new Iso_8859_10_LatvianModel());
        probers[40] = new SingleByteCharSetProber(new Iso_8859_4_LatvianModel());

        // Portuguese
        probers[41] = new SingleByteCharSetProber(new Iso_8859_1_PortugueseModel());
        probers[42] = new SingleByteCharSetProber(new Iso_8859_9_PortugueseModel());
        probers[43] = new SingleByteCharSetProber(new Iso_8859_15_PortugueseModel());
        probers[44] = new SingleByteCharSetProber(new Windows_1252_PortugueseModel());

        // Maltese
        probers[45] = new SingleByteCharSetProber(new Iso_8859_3_MalteseModel());

        // Czech
        probers[46] = new SingleByteCharSetProber(new Windows_1250_CzechModel());
        probers[47] = new SingleByteCharSetProber(new Iso_8859_2_CzechModel());
        probers[48] = new SingleByteCharSetProber(new Mac_Centraleurope_CzechModel());
        probers[49] = new SingleByteCharSetProber(new Ibm852_CzechModel());

        // Slovak
        probers[50] = new SingleByteCharSetProber(new Windows_1250_SlovakModel());
        probers[51] = new SingleByteCharSetProber(new Iso_8859_2_SlovakModel());
        probers[52] = new SingleByteCharSetProber(new Mac_Centraleurope_SlovakModel());
        probers[53] = new SingleByteCharSetProber(new Ibm852_SlovakModel());

        // Polish
        probers[54] = new SingleByteCharSetProber(new Windows_1250_PolishModel());
        probers[55] = new SingleByteCharSetProber(new Iso_8859_2_PolishModel());
        probers[56] = new SingleByteCharSetProber(new Iso_8859_13_PolishModel());
        probers[57] = new SingleByteCharSetProber(new Iso_8859_16_PolishModel());
        probers[58] = new SingleByteCharSetProber(new Mac_Centraleurope_PolishModel());
        probers[59] = new SingleByteCharSetProber(new Ibm852_PolishModel());

        // Finnish
        probers[60] = new SingleByteCharSetProber(new Iso_8859_1_FinnishModel());
        probers[61] = new SingleByteCharSetProber(new Iso_8859_4_FinnishModel());
        probers[62] = new SingleByteCharSetProber(new Iso_8859_9_FinnishModel());
        probers[63] = new SingleByteCharSetProber(new Iso_8859_13_FinnishModel());
        probers[64] = new SingleByteCharSetProber(new Iso_8859_15_FinnishModel());
        probers[65] = new SingleByteCharSetProber(new Windows_1252_FinnishModel());

        // Italian
        probers[66] = new SingleByteCharSetProber(new Iso_8859_1_ItalianModel());
        probers[67] = new SingleByteCharSetProber(new Iso_8859_3_ItalianModel());
        probers[68] = new SingleByteCharSetProber(new Iso_8859_9_ItalianModel());
        probers[69] = new SingleByteCharSetProber(new Iso_8859_15_ItalianModel());
        probers[70] = new SingleByteCharSetProber(new Windows_1252_ItalianModel());

        // Croatian
        probers[71] = new SingleByteCharSetProber(new Windows_1250_CroatianModel());
        probers[72] = new SingleByteCharSetProber(new Iso_8859_2_CroatianModel());
        probers[73] = new SingleByteCharSetProber(new Iso_8859_13_CroatianModel());
        probers[74] = new SingleByteCharSetProber(new Iso_8859_16_CroatianModel());
        probers[75] = new SingleByteCharSetProber(new Mac_Centraleurope_CroatianModel());
        probers[76] = new SingleByteCharSetProber(new Ibm852_CroatianModel());

        // Estonian
        probers[77] = new SingleByteCharSetProber(new Windows_1252_EstonianModel());
        probers[78] = new SingleByteCharSetProber(new Windows_1257_EstonianModel());
        probers[79] = new SingleByteCharSetProber(new Iso_8859_4_EstonianModel());
        probers[80] = new SingleByteCharSetProber(new Iso_8859_13_EstonianModel());
        probers[81] = new SingleByteCharSetProber(new Iso_8859_15_EstonianModel());

        // Irish
        probers[82] = new SingleByteCharSetProber(new Iso_8859_1_IrishModel());
        probers[83] = new SingleByteCharSetProber(new Iso_8859_9_IrishModel());
        probers[84] = new SingleByteCharSetProber(new Iso_8859_15_IrishModel());
        probers[85] = new SingleByteCharSetProber(new Windows_1252_IrishModel());

        // Romanian
        probers[86] = new SingleByteCharSetProber(new Windows_1250_RomanianModel());
        probers[87] = new SingleByteCharSetProber(new Iso_8859_2_RomanianModel());
        probers[88] = new SingleByteCharSetProber(new Iso_8859_16_RomanianModel());
        probers[89] = new SingleByteCharSetProber(new Ibm852_RomanianModel());

        // Slovene
        probers[90] = new SingleByteCharSetProber(new Windows_1250_SloveneModel());
        probers[91] = new SingleByteCharSetProber(new Iso_8859_2_SloveneModel());
        probers[92] = new SingleByteCharSetProber(new Iso_8859_16_SloveneModel());
        probers[93] = new SingleByteCharSetProber(new Mac_Centraleurope_SloveneModel());
        probers[94] = new SingleByteCharSetProber(new Ibm852_SloveneModel());

        // Swedish
        probers[95] = new SingleByteCharSetProber(new Iso_8859_1_SwedishModel());
        probers[96] = new SingleByteCharSetProber(new Iso_8859_4_SwedishModel());
        probers[97] = new SingleByteCharSetProber(new Iso_8859_9_SwedishModel());
        probers[98] = new SingleByteCharSetProber(new Iso_8859_15_SwedishModel());
        probers[99] = new SingleByteCharSetProber(new Windows_1252_SwedishModel());

        Reset();
    }