public static TestClass CreateVectorTest(BagOfWords bow) { TestClass T; Random r = new Random(); var allVectors = bow.GetVectorsList(); var TrainingVectors = new List <myVector>(); var TestVectors = new List <myVector>(); int count = allVectors.Count(); int trainingCount = (int)(0.8 * count); int testCount = count - trainingCount; while (TrainingVectors.Count < trainingCount) { int index = r.Next(0, count); if (TrainingVectors.Contains(allVectors[index]) == false) { TrainingVectors.Add(allVectors[index]); } } foreach (myVector v in allVectors) { if ((TrainingVectors.Contains(v)) == false) { TestVectors.Add(v); } } T = new TestClass(TrainingVectors, TestVectors); return(T); }
public static void CalculateTFIDF(BagOfWords bow) { int x = 0; //current vector for TFIDF var vectors = bow.GetVectorsList(); List <List <double> > WholeTFIDF = new List <List <double> >(); while (x < vectors.Count()) { List <double> TFIDF = new List <double>(); var v = vectors[x].GetVector(); int index = 0; while (index < v.Count()) { int count = 0; double TF = v[index]; //Term frequency is stored in Bag of Word foreach (var vector in vectors) { List <double> V = vector.GetVector(); if (V[index] > 0) { count++; } } double IDF = Math.Log(vectors.Count() / count); TFIDF.Add(IDF * TF); index++; } WholeTFIDF.Add(TFIDF); //adding whole list to list of lists. x++; } double parameter = 5.32; List <int> IndexesToRemove = new List <int>(); for (int j = 0; j < WholeTFIDF[0].Count; j++) { if (WholeTFIDF[0][j] <= parameter) //zmienić tu { IndexesToRemove.Add(j); } } foreach (List <double> wordIndexes in WholeTFIDF) { for (int i = 0; i < IndexesToRemove.Count; i++) { if (wordIndexes[IndexesToRemove[i]] >= parameter) //zmienić tu { IndexesToRemove.RemoveAt(i); } } } List <string> WordstoRemove = new List <string>(); for (int i = 0; i < IndexesToRemove.Count; i++) { WordstoRemove.Add(bow.GetWordsList().ElementAt(IndexesToRemove.ElementAt(i))); } bow.RemoveWords(WordstoRemove); }
public static List <DataClass> CreateFullSet(List <DataClass> Classes, BagOfWords BoW) { List <myVector> Articles = BoW.GetVectorsList(); Random rand = new Random(); foreach (myVector V in Articles) { List <double> article = V.GetVector(); string name = V.GetVectorName(); foreach (DataClass C in Classes) { if (name.Contains(C.GetName())) { C.AddVector(V); } } } return(Classes); }
public static List <DataClass> CreateDataClasses(BagOfWords BoW) { List <String> ClassNames = new List <String>(); List <DataClass> Classes = new List <DataClass>(); foreach (myVector v in BoW.GetVectorsList()) { string Classname = v.GetVectorName(); Classname = Classname.Split('_').First(); if (!ClassNames.Contains(Classname)) { ClassNames.Add(Classname); } } foreach (string s in ClassNames) { Classes.Add(new DataClass(s)); } return(Classes); }