public Dictionary <string, int> Learn(string languageCode, string sourcePath, string targetPath = null) { var text = Helper.GetFileContents(sourcePath); var ngramBuilder = new NgramBuilder(); var ngrams = ngramBuilder.Get(text); if (targetPath == null) { return(ngrams); } Save(languageCode, ngrams, targetPath); return(ngrams); }
public string Detect(string textOrPath, string choice) { //score = 0; var text = String.Empty; if (choice.Equals("F", StringComparison.OrdinalIgnoreCase)) { text = Helper.GetFileContents(textOrPath); //read file } else { text = textOrPath; } var ngramBuilder = new NgramBuilder(MaxLength, true); var ngrams = ngramBuilder.Get(text); //create an ngram dictionary if (ngrams == null) { return(null); } var shortestDistance = int.MaxValue; var probability = 0; string lowestScoringLanguage = null; foreach (var availableLanguage in _availableLanguages) { //calculate distance between language and ngrams var distance = 0; var probabilityHits = 0; foreach (var ngram in ngrams) { if (availableLanguage.Value.ContainsKey(ngram.Key)) { distance += ngram.Value - availableLanguage.Value[ngram.Key]; probabilityHits++; } else { distance += MaxPenalty; } if (distance > shortestDistance) { break; } } if (distance < shortestDistance) { shortestDistance = distance; lowestScoringLanguage = availableLanguage.Key; probability = probabilityHits; } } //score = probability; return(lowestScoringLanguage); }