private ArrayList GenerateTerms(string[] docs) { ArrayList uniques = new ArrayList(); ngramDoc = new string[numDocs][]; for (int i = 0; i < docs.Length; i++) { Tokening tokenizer = new Tokening(); string[] words = tokenizer.Partition(docs[i]); for (int j = 0; j < words.Length; j++) { if (!uniques.Contains(words[j])) { uniques.Add(words[j]); } } } return(uniques); }
private IDictionary GetWordFrequency(string input) { string convertedInput = input.ToLower(); Tokening tokenizer = new Tokening(); String[] words = tokenizer.Partition(convertedInput); Array.Sort(words); String[] distinctWords = GetDistinctWords(words); IDictionary result = new Hashtable(); for (int i = 0; i < distinctWords.Length; i++) { object tmp; tmp = CntWrd(distinctWords[i], words); result[distinctWords[i]] = tmp; } return(result); }