Example #1
0
        private ArrayList GenerateTerms(string[] docs)
        {
            ArrayList uniques = new ArrayList();

            ngramDoc = new string[numDocs][];
            for (int i = 0; i < docs.Length; i++)
            {
                Tokening tokenizer = new Tokening();
                string[] words     = tokenizer.Partition(docs[i]);

                for (int j = 0; j < words.Length; j++)
                {
                    if (!uniques.Contains(words[j]))
                    {
                        uniques.Add(words[j]);
                    }
                }
            }
            return(uniques);
        }
Example #2
0
        private string[] GetDistinctWords(String[] ip)
        {
            if (ip == null)
            {
                return(new string[0]);
            }
            else
            {
                ArrayList list = new ArrayList();

                for (int i = 0; i < ip.Length; i++)
                {
                    if (!list.Contains(ip[i])) // N-GRAM SIMILARITY?
                    {
                        list.Add(ip[i]);
                    }
                }

                return(Tokening.ArrLstToArr(list));
            }
        }
Example #3
0
        private IDictionary GetWordFrequency(string input)
        {
            string convertedInput = input.ToLower();

            Tokening tokenizer = new Tokening();

            String[] words = tokenizer.Partition(convertedInput);
            Array.Sort(words);

            String[] distinctWords = GetDistinctWords(words);

            IDictionary result = new Hashtable();

            for (int i = 0; i < distinctWords.Length; i++)
            {
                object tmp;
                tmp = CntWrd(distinctWords[i], words);
                result[distinctWords[i]] = tmp;
            }

            return(result);
        }