Exemplo n.º 1
0
        /// <summary>
        /// Procustes statistics which gives a (di)similiarity measure of two set of points, by removing translation, rotation and dilation(stretching) degrees of freedom.
        /// Zero as result means the two sets of points are basically the same after translation, rotation and dilation with the corresponding matrices.
        /// Reference: Modern Multidimensional Scaling, Theory and Applications, page 436, Procrustes Analysis
        /// </summary>
        /// <param name="A"></param>
        /// <param name="B"></param>
        /// <returns></returns>
        public static Tuple <String, double> ProcrustesStatistics(List <Point> A, List <Point> B)
        {
            int n = A.Count;
            //make A to be unitlength
            double minX = A.Min(p => p.X);
            double maxX = A.Max(p => p.X);
            double minY = A.Min(p => p.Y);
            double maxY = A.Max(p => p.Y);

            double deltaX = maxX - minX;
            double deltaY = maxY - minY;
            double scale  = Math.Max(deltaX, deltaY);


            A = A.Select(p => new Point(p.X / scale, p.Y / scale)).ToList();


            var centerA = new Point(A.Average(a => a.X), A.Average(a => a.Y));
            var centerB = new Point(B.Average(b => b.X), B.Average(b => b.Y));

            Matrix X = DenseMatrix.Create(n, 2, (i, j) => j == 0 ? A[i].X:A[i].Y);
            Matrix Y = DenseMatrix.Create(n, 2, (i, j) => j == 0 ? B[i].X:B[i].Y);

            Matrix Xc = DenseMatrix.Create(n, 2, (i, j) => j == 0 ? A[i].X - centerA.X : A[i].Y - centerA.Y);
            Matrix Yc = DenseMatrix.Create(n, 2, (i, j) => j == 0 ? B[i].X - centerB.X : B[i].Y - centerB.Y);

            //Reference: Modern Multidimensional Scaling, Theory and Applications, page 436, Procrustes Analysis
            DenseMatrix C = (DenseMatrix)(Xc.Transpose() * Y);

            Svd svd = new DenseSvd(C, true);
            //rotation
            Matrix <double> T = (svd.VT().Transpose()) * (svd.U().Transpose());
            //dilation
            double s = ((C * T).Trace()) / ((Yc.Transpose() * Y).Trace());
            //column Vector with n times 1
            Vector <double> vector1 = DenseVector.Create(n, i => 1);
            //translation vector
            Vector <double> t = (1.0 / n) * (X - s * Y * T).Transpose() * vector1;

            Matrix translationMatrix = DenseMatrix.Create(n, 2, (i, j) => t.At(j));

            Matrix <double> YPrime = s * Y * T + translationMatrix;
            Matrix <double> delta  = X - YPrime;

            double rSquare = 0;

            for (int i = 0; i < n; i++)
            {
                rSquare += delta.Row(i) * delta.Row(i);
            }
            return(Tuple.Create("ProcrustesStatistics", Math.Sqrt(rSquare)));
        }
        public void GenerateMatrix(List <UniqueWords> UniqueWordList)
        {
            double sum = 0;

            tfidfDocMatrix = new DenseMatrix(UniqueWordList.Count, DocumentInfo.TotalPages);
            //builds tf-idf document matrix
            for (int i = 0; i < UniqueWordList.Count; i++)
            {
                for (int j = 0; j < DocumentInfo.TotalPages; j++)
                {
                    if (UniqueWordList[i].PagenoWithFrequency.ContainsKey(j + 1))      //insert into matrix only if the key is present in the dictionary or else 0 is already initialised
                    {
                        int tf = UniqueWordList[i].PagenoWithFrequency[j + 1];

                        //calculating IDF
                        double idf = Math.Log10(Convert.ToDouble(DocumentInfo.TotalPages) / UniqueWordList[i].DocFrequency);
                        tfidfDocMatrix[i, j] = tf * idf;
                        //tfidfDocMatrix[i, j] = tfidfDocMatrix[i, j]/_eachPageWordCount[j];  //normalised TF.IDF matrix
                        tfidfDocMatrix[i, j] = Math.Round(tfidfDocMatrix[i, j], 2);
                    }
                }
            }


            //          WordnFrequencyTxtBox.AppendText(tfidfDocMatrix.ToMatrixString(UniqueWordList.Count, DocumentInfo.TotalPages));
            //for(int i=0;i<UniqueWordList.Count;i++)
            //{
            //    for (int j = 3; j < 4; j++)
            //    {
            //        sum += tfidfDocMatrix[i, j];
            //    }
            //}

            //WordnFrequencyTxtBox.AppendText(sum.ToString());
            // DenseMatrix dm=new DenseMatrix(tfidfDocMatrix);
            // Svd svd=new DenseSvd(dm,true);


            Svd svd = new DenseSvd(tfidfDocMatrix, true);
            //Matrix<double> s = svd.W();
            //Matrix<double> t = svd.U();
            //Matrix<double> d = svd.VT();
            //Matrix<double> tsd = t*s*d;
            //WordnFrequencyTxtBox.AppendText("\n" + "\n" + tfidfDocMatrix.ToString());
            //WordnFrequencyTxtBox.AppendText("\n"+"\n"+tsd.ToString());
            // WordnFrequencyTxtBox.AppendText("\n" + "\n" +s.ToString());



            Matrix <double> f = svd.U();

            Matrix <double> s = svd.W();

            Matrix <double> t   = svd.VT();
            Matrix <double> tc1 = t.Column(0).ToColumnMatrix();
            Matrix <double> tc2 = t.Column(1).ToColumnMatrix();
            //tc1.SetSubMatrix();

            Matrix <double> rorigi = f * s * t;

            // WordnFrequencyTxtBox.Clear();
            for (int i = 0; i < rorigi.RowCount; i++)
            {
                for (int j = 0; j < rorigi.ColumnCount; j++)
                {
                    int r;
                    if (rorigi[i, j].ToString().Contains("E"))      //insert into matrix only if the key is present in the dictionary or else 0 is already initialised
                    {
                        rorigi[i, j] = 0;
                    }
                }
            }
            //           WordnFrequencyTxtBox.AppendText("\n" + rorigi.ToString());
            //           WordnFrequencyTxtBox.AppendText("\n" + tfidfDocMatrix.ToString());
        }