Пример #1
0
        private ArrayList GenerateTerms(string[] docs)
        {
            ArrayList uniques = new ArrayList();

            ngramDoc = new string[numDocs][];
            for (int i = 0; i < docs.Length; i++)
            {
                Tokening tokenizer = new Tokening();
                string[] words     = tokenizer.Partition(docs[i]);

                for (int j = 0; j < words.Length; j++)
                {
                    if (!uniques.Contains(words[j]))
                    {
                        uniques.Add(words[j]);
                    }
                }
            }
            return(uniques);
        }
Пример #2
0
        private IDictionary GetWordFrequency(string input)
        {
            string convertedInput = input.ToLower();

            Tokening tokenizer = new Tokening();

            String[] words = tokenizer.Partition(convertedInput);
            Array.Sort(words);

            String[] distinctWords = GetDistinctWords(words);

            IDictionary result = new Hashtable();

            for (int i = 0; i < distinctWords.Length; i++)
            {
                object tmp;
                tmp = CntWrd(distinctWords[i], words);
                result[distinctWords[i]] = tmp;
            }

            return(result);
        }