public static double[][] ReturnTFIDFVectors(List <Document> documents) { // generate list ordering of megadictionary List <string> keysList = MegaDictionary.ReturnKeysList(); List <List <double> > TFIDVectors = new List <List <double> >(); int counter = 1; foreach (var document in documents) { //Debug.WriteLine("TFIDF vector for document #: " + counter); List <double> documentVector = new List <double>(); // calculate TFDIF vector for document foreach (var word in keysList) { double tf = document.UniqueWordsFreq() == 0 ? 0 : (double)document.ReturnFrequency(word) / document.UniqueWordsFreq(); // if document has 0 terms it it, return 0 double calc = documents.Count / MegaDictionary.ReturnTermFrequency(word); double idf = Math.Log(calc); documentVector.Add(tf * idf); } TFIDVectors.Add(documentVector); counter++; } // change into double[][] and normalize double[][] vectors = TFIDVectors.Select(v => v.ToArray()).ToArray(); Normalize(vectors); return(vectors); }
private double aK() { if (_k < 2) { _previousAK = -1.0; } else if (_k == 2) { _previousAK = 1 - ((double)3 / (4 * MegaDictionary.ReturnKeysList().Count)); // set current aK to previous } else { _previousAK = _previousAK + ((1 - _previousAK) / 6); } Debug.Assert(!_previousAK.Equals(-1.0)); return(_previousAK); }
public static Dictionary <int, double>[] ReturnTFIDFDicts(List <Document> documents) { // generate list ordering of megadictionary List <string> keysList = MegaDictionary.ReturnKeysList(); List <Dictionary <int, double> > TFIDFDictionaryList = new List <Dictionary <int, double> >(); int counter = 1; foreach (var document in documents) { Debug.WriteLine("TFIDF vector for document #: " + counter); Dictionary <int, double> TFIDFDict = new Dictionary <int, double>(); // calculate TFDIF vector for document for (int i = 0; i < keysList.Count; i++) { string word = keysList[i]; double tf = document.UniqueWordsFreq() == 0 ? 0 : (double)document.ReturnFrequency(word) / document.UniqueWordsFreq(); // if document has 0 terms it it, return 0 double calc = documents.Count / MegaDictionary.ReturnTermFrequency(word); double idf = Math.Log(calc); double tfidf = tf * idf; // only add to dictionary if tfidf is not 0 if (tfidf != 0) { TFIDFDict.Add(i, tfidf); } } TFIDFDictionaryList.Add(TFIDFDict); counter++; } // change into array and normalize Dictionary <int, double>[] listOfDictionaries = TFIDFDictionaryList.ToArray(); NormalizeDictionaryArray(listOfDictionaries); return(listOfDictionaries); }