コード例 #1
0
        public static double[][] ReturnTFIDFVectors(List <Document> documents)
        {
            // generate list ordering of megadictionary
            List <string>         keysList    = MegaDictionary.ReturnKeysList();
            List <List <double> > TFIDVectors = new List <List <double> >();

            int counter = 1;

            foreach (var document in documents)
            {
                //Debug.WriteLine("TFIDF vector for document #: " + counter);
                List <double> documentVector = new List <double>();
                // calculate TFDIF vector for document
                foreach (var word in keysList)
                {
                    double tf   = document.UniqueWordsFreq() == 0 ? 0 : (double)document.ReturnFrequency(word) / document.UniqueWordsFreq(); // if document has 0 terms it it, return 0
                    double calc = documents.Count / MegaDictionary.ReturnTermFrequency(word);
                    double idf  = Math.Log(calc);
                    documentVector.Add(tf * idf);
                }

                TFIDVectors.Add(documentVector);
                counter++;
            }

            // change into double[][] and normalize
            double[][] vectors = TFIDVectors.Select(v => v.ToArray()).ToArray();
            Normalize(vectors);
            return(vectors);
        }
コード例 #2
0
        private double aK()
        {
            if (_k < 2)
            {
                _previousAK = -1.0;
            }
            else if (_k == 2)
            {
                _previousAK = 1 - ((double)3 / (4 * MegaDictionary.ReturnKeysList().Count)); // set current aK to previous
            }
            else
            {
                _previousAK = _previousAK + ((1 - _previousAK) / 6);
            }

            Debug.Assert(!_previousAK.Equals(-1.0));
            return(_previousAK);
        }
コード例 #3
0
        public static Dictionary <int, double>[] ReturnTFIDFDicts(List <Document> documents)
        {
            // generate list ordering of megadictionary
            List <string> keysList = MegaDictionary.ReturnKeysList();

            List <Dictionary <int, double> > TFIDFDictionaryList = new List <Dictionary <int, double> >();
            int counter = 1;

            foreach (var document in documents)
            {
                Debug.WriteLine("TFIDF vector for document #: " + counter);
                Dictionary <int, double> TFIDFDict = new Dictionary <int, double>();

                // calculate TFDIF vector for document
                for (int i = 0; i < keysList.Count; i++)
                {
                    string word  = keysList[i];
                    double tf    = document.UniqueWordsFreq() == 0 ? 0 : (double)document.ReturnFrequency(word) / document.UniqueWordsFreq(); // if document has 0 terms it it, return 0
                    double calc  = documents.Count / MegaDictionary.ReturnTermFrequency(word);
                    double idf   = Math.Log(calc);
                    double tfidf = tf * idf;

                    // only add to dictionary if tfidf is not 0
                    if (tfidf != 0)
                    {
                        TFIDFDict.Add(i, tfidf);
                    }
                }

                TFIDFDictionaryList.Add(TFIDFDict);
                counter++;
            }

            // change into array and normalize
            Dictionary <int, double>[] listOfDictionaries = TFIDFDictionaryList.ToArray();
            NormalizeDictionaryArray(listOfDictionaries);
            return(listOfDictionaries);
        }