/// <summary>
        ///
        /// </summary>
        /// <param name="docs"></param>
        /// <returns></returns>
        private ArrayList GenerateTerms(string[] docs)
        {
            ArrayList uniques = new ArrayList();

            _ngramDoc = new string[_numDocs][];
            for (int i = 0; i < docs.Length; i++)
            {
                Tokeniser tokenizer = new Tokeniser();
                string[]  words     = tokenizer.Partition(docs[i]);

                for (int j = 0; j < words.Length; j++)
                {
                    if (!uniques.Contains(words[j]))
                    {
                        uniques.Add(words[j]);
                    }
                }
            }
            return(uniques);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="input"></param>
        /// <returns></returns>
        private IDictionary GetWordFrequency(string input)
        {
            string convertedInput = input.ToLower();

            Tokeniser tokenizer = new Tokeniser();

            String[] words = tokenizer.Partition(convertedInput);
            Array.Sort(words);

            String[] distinctWords = GetDistinctWords(words);

            IDictionary result = new Hashtable();

            for (int i = 0; i < distinctWords.Length; i++)
            {
                object tmp;
                tmp = CountWords(distinctWords[i], words);
                result[distinctWords[i]] = tmp;
            }

            return(result);
        }
Exemple #3
0
		private ArrayList GenerateTerms(string[] docs)
		{
			ArrayList uniques=new ArrayList() ;
			_ngramDoc=new string[_numDocs][] ;
			for (int i=0; i < docs.Length ; i++)
			{
				Tokeniser tokenizer=new Tokeniser() ;
				string[] words=tokenizer.Partition(docs[i]);			

				for (int j=0; j < words.Length ; j++)
					if (!uniques.Contains(words[j]) )				
						uniques.Add(words[j]) ;
								
			}
			return uniques;
		}
Exemple #4
0
		private IDictionary GetWordFrequency(string input)
		{
			string convertedInput=input.ToLower() ;
					
			Tokeniser tokenizer=new Tokeniser() ;
			String[] words=tokenizer.Partition(convertedInput);			
			Array.Sort(words);
			
			String[] distinctWords=GetDistinctWords(words);
						
			IDictionary result=new Hashtable();
			for (int i=0; i < distinctWords.Length; i++)
			{
				object tmp;
				tmp=CountWords(distinctWords[i], words);
				result[distinctWords[i]]=tmp;
				
			}
			
			return result;
		}