Exemple #1
0
        public static TestClass CreateVectorTest(BagOfWords bow)
        {
            TestClass T;
            Random    r               = new Random();
            var       allVectors      = bow.GetVectorsList();
            var       TrainingVectors = new List <myVector>();
            var       TestVectors     = new List <myVector>();

            int count = allVectors.Count();

            int trainingCount = (int)(0.8 * count);
            int testCount     = count - trainingCount;

            while (TrainingVectors.Count < trainingCount)
            {
                int index = r.Next(0, count);
                if (TrainingVectors.Contains(allVectors[index]) == false)
                {
                    TrainingVectors.Add(allVectors[index]);
                }
            }

            foreach (myVector v in allVectors)
            {
                if ((TrainingVectors.Contains(v)) == false)
                {
                    TestVectors.Add(v);
                }
            }

            T = new TestClass(TrainingVectors, TestVectors);
            return(T);
        }
Exemple #2
0
        public static void CalculateTFIDF(BagOfWords bow)
        {
            int x       = 0; //current vector for TFIDF
            var vectors = bow.GetVectorsList();
            List <List <double> > WholeTFIDF = new List <List <double> >();

            while (x < vectors.Count())
            {
                List <double> TFIDF = new List <double>();
                var           v     = vectors[x].GetVector();
                int           index = 0;
                while (index < v.Count())
                {
                    int    count = 0;
                    double TF    = v[index]; //Term frequency is stored in Bag of Word
                    foreach (var vector in vectors)
                    {
                        List <double> V = vector.GetVector();
                        if (V[index] > 0)
                        {
                            count++;
                        }
                    }
                    double IDF = Math.Log(vectors.Count() / count);
                    TFIDF.Add(IDF * TF);
                    index++;
                }
                WholeTFIDF.Add(TFIDF); //adding whole list to list of lists.
                x++;
            }
            double     parameter       = 5.32;
            List <int> IndexesToRemove = new List <int>();

            for (int j = 0; j < WholeTFIDF[0].Count; j++)
            {
                if (WholeTFIDF[0][j] <= parameter)    //zmienić tu
                {
                    IndexesToRemove.Add(j);
                }
            }
            foreach (List <double> wordIndexes in WholeTFIDF)
            {
                for (int i = 0; i < IndexesToRemove.Count; i++)
                {
                    if (wordIndexes[IndexesToRemove[i]] >= parameter) //zmienić tu
                    {
                        IndexesToRemove.RemoveAt(i);
                    }
                }
            }
            List <string> WordstoRemove = new List <string>();

            for (int i = 0; i < IndexesToRemove.Count; i++)
            {
                WordstoRemove.Add(bow.GetWordsList().ElementAt(IndexesToRemove.ElementAt(i)));
            }
            bow.RemoveWords(WordstoRemove);
        }
Exemple #3
0
        public static List <DataClass> CreateFullSet(List <DataClass> Classes, BagOfWords BoW)
        {
            List <myVector> Articles = BoW.GetVectorsList();
            Random          rand     = new Random();

            foreach (myVector V in Articles)
            {
                List <double> article = V.GetVector();
                string        name    = V.GetVectorName();
                foreach (DataClass C in Classes)
                {
                    if (name.Contains(C.GetName()))
                    {
                        C.AddVector(V);
                    }
                }
            }
            return(Classes);
        }
Exemple #4
0
        public static List <DataClass> CreateDataClasses(BagOfWords BoW)
        {
            List <String>    ClassNames = new List <String>();
            List <DataClass> Classes    = new List <DataClass>();

            foreach (myVector v in BoW.GetVectorsList())
            {
                string Classname = v.GetVectorName();
                Classname = Classname.Split('_').First();
                if (!ClassNames.Contains(Classname))
                {
                    ClassNames.Add(Classname);
                }
            }
            foreach (string s in ClassNames)
            {
                Classes.Add(new DataClass(s));
            }

            return(Classes);
        }