private LanguageModel <string> TrainModel(LanguageInfo languageInfo, TextReader text) { IEnumerable <string> tokens = new CharacterNGramExtractor(MaxNGramLength, OnlyReadFirstNLines).GetFeatures(text); IDistribution <string> distribution = LanguageModelCreator.CreateLangaugeModel(tokens, OccuranceNumberThreshold, MaximumSizeOfDistribution); var languageModel = new LanguageModel <string>(distribution, languageInfo); return(languageModel); }
public IEnumerable <Tuple <LanguageInfo, double> > Identify(string text) { var extractor = new CharacterNGramExtractor(MaxNGramLength, OnlyReadFirstNLines); var tokens = extractor.GetFeatures(text); var model = LanguageModelCreator.CreateLangaugeModel(tokens, OccuranceNumberThreshold, MaximumSizeOfDistribution); var likelyLanguages = _classifier.Classify(model); return(likelyLanguages); }