/// <summary> /// returns possible languages of text passed or empty sequence if too uncertain /// </summary> /// <param name="text">text language of which should be identified</param> /// <param name="languageModelsDirectory"></param> /// <param name="maximumSizeOfDistribution"></param> /// <param name="settings">null for default settings</param> /// <returns></returns> public static IEnumerable <Tuple <LanguageInfo, double> > ClassifyText( string text, string languageModelsDirectory = LanguageModelsDirectoryDefault, int maximumSizeOfDistribution = MaximumSizeOfDistributionDefault, LanguageIdentifierSettings settings = null ) { var languageIdentifier = new LanguageIdentifier(languageModelsDirectory, maximumSizeOfDistribution); return(languageIdentifier.ClassifyText(text, settings)); }
/// <summary> /// returns possible languages of text contained in <paramref name="input"/> or empty sequence if too uncertain. /// </summary> /// <param name="input"></param> /// <param name="encoding">encoding of text contained in stream or null if encoding is unknown beforehand. /// <para> When encoding is not null, for performance and quality reasons /// make sure that <paramref name="languageModelsDirectory"/> points to models /// built from UTF8 encoded files (Wikipedia-Experimental-UTF8Only)</para></param> /// <param name="languageModelsDirectory"></param> /// <param name="maximumSizeOfDistribution"></param> /// <param name="settings">null for default settings</param> /// <returns></returns> public static IEnumerable <Tuple <LanguageInfo, double> > ClassifyBytes( Stream input, Encoding encoding = null, string languageModelsDirectory = LanguageModelsDirectoryDefault, int maximumSizeOfDistribution = MaximumSizeOfDistributionDefault, LanguageIdentifierSettings settings = null ) { var languageIdentifier = new LanguageIdentifier(languageModelsDirectory, maximumSizeOfDistribution); return(languageIdentifier.ClassifyBytes(input, encoding, settings)); }