Exemple #1
0
        // Methods -----------------------

        public void AddOccurence(WordOccurrence token, long frequency = 1)
        {
            var isTokenValid     = true;
            var hasEnglishLetter = false;

            // Only a-z, A-Z, - and . are allowed
            foreach (var c in token.Word)
            {
                if ((65 <= c && c <= 90) || (97 <= c && c <= 122))
                {
                    hasEnglishLetter = true;
                }
                else if (c == 39 || c == 45 || c == 46)
                {
                    // character is valid but it's not a letter
                }
                else
                {
                    isTokenValid = false;
                    break;
                }
            }

            if (!isTokenValid && !hasEnglishLetter)
            {
                return;
            }

            var relevantDictionary = isTokenValid && hasEnglishLetter ? WordFrequencies : ExcludedWordFrequencies;

            var alreadyExist = relevantDictionary.ContainsKey(token);

            if (alreadyExist)
            {
                relevantDictionary[token] += frequency;
            }
            else
            {
                relevantDictionary.Add(token, frequency);
            }
        }
Exemple #2
0
        public void LoadFrequencyDictionary(string filePath, int minimumFrequency = 0)
        {
            var lines = File.ReadAllLines(filePath);

            foreach (var line in lines)
            {
                var parts = line.Split(Utilities.CsvSeparator);
                if (parts.Length == 3)
                {
                    var wordOccurrence = new WordOccurrence()
                    {
                        Word = parts[0],
                        IsFirstTokenInSentence = bool.Parse(parts[1])
                    };
                    var freq = long.Parse(parts[2]);
                    if (minimumFrequency <= freq)
                    {
                        this.AddOccurence(wordOccurrence, freq);
                    }
                }
            }
            Console.WriteLine("Loaded {0} word occurrences", lines.Count());
        }