public string Detect(string text) { //score = 0; //var text = String.Empty; var ngramBuilder = new NgramBuilder(MaxLength, true); var ngrams = ngramBuilder.Get(text); //create an ngram dictionary if (ngrams == null) { return(null); } var shortestDistance = int.MaxValue; var probability = 0; string lowestScoringLanguage = null; foreach (var availableLanguage in _availableLanguages) { //calculate distance between language and ngrams var distance = 0; var probabilityHits = 0; foreach (var ngram in ngrams) { if (availableLanguage.Value.ContainsKey(ngram.Key)) { distance += ngram.Value - availableLanguage.Value[ngram.Key]; probabilityHits++; } else { distance += MaxPenalty; } if (distance > shortestDistance) { break; } } if (distance < shortestDistance) { shortestDistance = distance; lowestScoringLanguage = availableLanguage.Key; probability = probabilityHits; } } //score = probability; return(lowestScoringLanguage); }
public Dictionary <string, int> Learn(string languageCode, string sourcePath, string targetPath = null) { var text = Helper.GetFileContents(sourcePath); var ngramBuilder = new NgramBuilder(); var ngrams = ngramBuilder.Get(text); if (targetPath == null) { return(ngrams); } Save(languageCode, ngrams, targetPath); return(ngrams); }
public Dictionary <string, Dictionary <string, int> > Remember(string path) { var result = new Dictionary <string, Dictionary <string, int> >(); string line; using (var reader = new StreamReader(path)) { while ((line = reader.ReadLine()) != null) { var model = line.Split(':'); var ngramBuilder = new NgramBuilder(); result.Add(model[0], ngramBuilder.Load(model[1].Split('_'))); } } return(result); }