Exemple #1
0
        /// <summary>
        /// returns possible languages of text passed or empty sequence if too uncertain
        /// </summary>
        /// <param name="text">text language of which should be identified</param>
        /// <param name="languageModelsDirectory"></param>
        /// <param name="maximumSizeOfDistribution"></param>
        /// <param name="settings">null for default settings</param>
        /// <returns></returns>
        public static IEnumerable <Tuple <LanguageInfo, double> > ClassifyText(
            string text,
            string languageModelsDirectory      = LanguageModelsDirectoryDefault,
            int maximumSizeOfDistribution       = MaximumSizeOfDistributionDefault,
            LanguageIdentifierSettings settings = null
            )
        {
            var languageIdentifier = new LanguageIdentifier(languageModelsDirectory, maximumSizeOfDistribution);

            return(languageIdentifier.ClassifyText(text, settings));
        }
Exemple #2
0
        /// <summary>
        /// returns possible languages of text contained in <paramref name="input"/> or empty sequence if too uncertain.
        /// </summary>
        /// <param name="input"></param>
        /// <param name="encoding">encoding of text contained in stream or null if encoding is unknown beforehand.
        /// <para> When encoding is not null, for performance and quality reasons
        /// make sure that <paramref name="languageModelsDirectory"/> points to models
        /// built from UTF8 encoded files (Wikipedia-Experimental-UTF8Only)</para></param>
        /// <param name="languageModelsDirectory"></param>
        /// <param name="maximumSizeOfDistribution"></param>
        /// <param name="settings">null for default settings</param>
        /// <returns></returns>
        public static IEnumerable <Tuple <LanguageInfo, double> > ClassifyBytes(
            Stream input,
            Encoding encoding = null,
            string languageModelsDirectory      = LanguageModelsDirectoryDefault,
            int maximumSizeOfDistribution       = MaximumSizeOfDistributionDefault,
            LanguageIdentifierSettings settings = null
            )
        {
            var languageIdentifier = new LanguageIdentifier(languageModelsDirectory, maximumSizeOfDistribution);

            return(languageIdentifier.ClassifyBytes(input, encoding, settings));
        }