private ArrayList GenerateTerms(string[] docs) { ArrayList uniques = new ArrayList(); ngramDoc = new string[numDocs][]; for (int i = 0; i < docs.Length; i++) { Tokening tokenizer = new Tokening(); string[] words = tokenizer.Partition(docs[i]); for (int j = 0; j < words.Length; j++) { if (!uniques.Contains(words[j])) { uniques.Add(words[j]); } } } return(uniques); }
private string[] GetDistinctWords(String[] ip) { if (ip == null) { return(new string[0]); } else { ArrayList list = new ArrayList(); for (int i = 0; i < ip.Length; i++) { if (!list.Contains(ip[i])) // N-GRAM SIMILARITY? { list.Add(ip[i]); } } return(Tokening.ArrLstToArr(list)); } }
private IDictionary GetWordFrequency(string input) { string convertedInput = input.ToLower(); Tokening tokenizer = new Tokening(); String[] words = tokenizer.Partition(convertedInput); Array.Sort(words); String[] distinctWords = GetDistinctWords(words); IDictionary result = new Hashtable(); for (int i = 0; i < distinctWords.Length; i++) { object tmp; tmp = CntWrd(distinctWords[i], words); result[distinctWords[i]] = tmp; } return(result); }