// Methods ----------------------- public void AddOccurence(WordOccurrence token, long frequency = 1) { var isTokenValid = true; var hasEnglishLetter = false; // Only a-z, A-Z, - and . are allowed foreach (var c in token.Word) { if ((65 <= c && c <= 90) || (97 <= c && c <= 122)) { hasEnglishLetter = true; } else if (c == 39 || c == 45 || c == 46) { // character is valid but it's not a letter } else { isTokenValid = false; break; } } if (!isTokenValid && !hasEnglishLetter) { return; } var relevantDictionary = isTokenValid && hasEnglishLetter ? WordFrequencies : ExcludedWordFrequencies; var alreadyExist = relevantDictionary.ContainsKey(token); if (alreadyExist) { relevantDictionary[token] += frequency; } else { relevantDictionary.Add(token, frequency); } }
public void LoadFrequencyDictionary(string filePath, int minimumFrequency = 0) { var lines = File.ReadAllLines(filePath); foreach (var line in lines) { var parts = line.Split(Utilities.CsvSeparator); if (parts.Length == 3) { var wordOccurrence = new WordOccurrence() { Word = parts[0], IsFirstTokenInSentence = bool.Parse(parts[1]) }; var freq = long.Parse(parts[2]); if (minimumFrequency <= freq) { this.AddOccurence(wordOccurrence, freq); } } } Console.WriteLine("Loaded {0} word occurrences", lines.Count()); }