public void GenerateMatrix(List<UniqueWords> UniqueWordList)
        {
            double sum = 0;
            tfidfDocMatrix = new DenseMatrix(UniqueWordList.Count, DocumentInfo.TotalPages);
            //builds tf-idf document matrix
            for (int i = 0; i < UniqueWordList.Count; i++)
            {
                for (int j = 0; j < DocumentInfo.TotalPages; j++)
                {
                    if (UniqueWordList[i].PagenoWithFrequency.ContainsKey(j + 1))      //insert into matrix only if the key is present in the dictionary or else 0 is already initialised
                    {
                        int tf = UniqueWordList[i].PagenoWithFrequency[j + 1];

                        //calculating IDF
                        double idf = Math.Log10(Convert.ToDouble(DocumentInfo.TotalPages) / UniqueWordList[i].DocFrequency);
                        tfidfDocMatrix[i, j] = tf * idf;
                        //tfidfDocMatrix[i, j] = tfidfDocMatrix[i, j]/_eachPageWordCount[j];  //normalised TF.IDF matrix
                        tfidfDocMatrix[i, j] = Math.Round(tfidfDocMatrix[i, j], 2);

                    }

                }

            }

              //          WordnFrequencyTxtBox.AppendText(tfidfDocMatrix.ToMatrixString(UniqueWordList.Count, DocumentInfo.TotalPages));
            //for(int i=0;i<UniqueWordList.Count;i++)
            //{
            //    for (int j = 3; j < 4; j++)
            //    {
            //        sum += tfidfDocMatrix[i, j];
            //    }
            //}

            //WordnFrequencyTxtBox.AppendText(sum.ToString());
            // DenseMatrix dm=new DenseMatrix(tfidfDocMatrix);
            // Svd svd=new DenseSvd(dm,true);

            Svd svd = new DenseSvd(tfidfDocMatrix, true);
            //Matrix<double> s = svd.W();
            //Matrix<double> t = svd.U();
            //Matrix<double> d = svd.VT();
            //Matrix<double> tsd = t*s*d;
            //WordnFrequencyTxtBox.AppendText("\n" + "\n" + tfidfDocMatrix.ToString());
            //WordnFrequencyTxtBox.AppendText("\n"+"\n"+tsd.ToString());
            // WordnFrequencyTxtBox.AppendText("\n" + "\n" +s.ToString());

            Matrix<double> f = svd.U();

            Matrix<double> s = svd.W();

            Matrix<double> t = svd.VT();
            Matrix<double> tc1 = t.Column(0).ToColumnMatrix();
            Matrix<double> tc2 = t.Column(1).ToColumnMatrix();
            //tc1.SetSubMatrix();

            Matrix<double> rorigi = f * s * t;
            // WordnFrequencyTxtBox.Clear();
            for (int i = 0; i < rorigi.RowCount; i++)
            {
                for (int j = 0; j < rorigi.ColumnCount; j++)
                {
                    int r;
                    if (rorigi[i, j].ToString().Contains("E"))      //insert into matrix only if the key is present in the dictionary or else 0 is already initialised
                    {

                        rorigi[i, j] = 0;

                    }

                }

            }
             //           WordnFrequencyTxtBox.AppendText("\n" + rorigi.ToString());
             //           WordnFrequencyTxtBox.AppendText("\n" + tfidfDocMatrix.ToString());
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Procustes statistics which gives a (di)similiarity measure of two set of points, by removing translation, rotation and dilation(stretching) degrees of freedom.
        /// Zero as result means the two sets of points are basically the same after translation, rotation and dilation with the corresponding matrices.
        /// Reference: Modern Multidimensional Scaling, Theory and Applications, page 436, Procrustes Analysis
        /// </summary>
        /// <param name="A"></param>
        /// <param name="B"></param>
        /// <returns></returns>
        public static Tuple<String, double> ProcrustesStatistics(List<Point> A, List<Point> B)
        {
            int n = A.Count;
            //make A to be unitlength
            double minX = A.Min(p => p.X);
            double maxX = A.Max(p => p.X);
            double minY = A.Min(p => p.Y);
            double maxY = A.Max(p => p.Y);

            double deltaX = maxX - minX;
            double deltaY = maxY - minY;
            double scale = Math.Max(deltaX, deltaY);

            A=A.Select(p => new Point(p.X/scale, p.Y/scale)).ToList();

            var centerA = new Point(A.Average(a => a.X), A.Average(a => a.Y));
            var centerB = new Point(B.Average(b => b.X), B.Average(b => b.Y));

            Matrix X = DenseMatrix.Create(n,2, (i, j) => j == 0 ? A[i].X:A[i].Y);
            Matrix Y = DenseMatrix.Create(n,2, (i, j) => j == 0 ? B[i].X:B[i].Y);

            Matrix Xc = DenseMatrix.Create(n, 2, (i, j) => j == 0 ? A[i].X-centerA.X : A[i].Y-centerA.Y);
            Matrix Yc = DenseMatrix.Create(n, 2, (i, j) => j == 0 ? B[i].X - centerB.X : B[i].Y - centerB.Y);

            //Reference: Modern Multidimensional Scaling, Theory and Applications, page 436, Procrustes Analysis
            DenseMatrix C = (DenseMatrix) (Xc.Transpose()*Y);

            Svd svd=new DenseSvd(C,true);
            //rotation
            Matrix<double> T = (svd.VT().Transpose())*(svd.U().Transpose());
            //dilation
            double s = ((C*T).Trace())/((Yc.Transpose()*Y).Trace());
            //column Vector with n times 1
            Vector<double> vector1 = DenseVector.Create(n, i => 1);
            //translation vector
            Vector<double> t =(1.0/n)*(X - s*Y*T).Transpose()*vector1;

            Matrix translationMatrix = DenseMatrix.Create(n, 2, (i, j) => t.At(j));

            Matrix<double> YPrime = s*Y*T + translationMatrix;
            Matrix<double> delta = X - YPrime;

            double rSquare = 0;
            for (int i = 0; i < n; i++) {
                rSquare += delta.Row(i)*delta.Row(i);
            }
            return Tuple.Create("ProcrustesStatistics",Math.Sqrt(rSquare));
        }