Пример #1
0
        /// <summary>
        /// Partial least squares (PLS) decomposition of the matrizes X and Y.
        /// </summary>
        /// <param name="_X">The X ("spectrum") matrix, centered and preprocessed.</param>
        /// <param name="_Y">The Y ("concentration") matrix (centered).</param>
        /// <param name="numFactors">Number of factors to calculate.</param>
        /// <param name="xLoads">Returns the matrix of eigenvectors of X. Should be initially empty.</param>
        /// <param name="yLoads">Returns the matrix of eigenvectors of Y. Should be initially empty. </param>
        /// <param name="W">Returns the matrix of weighting values. Should be initially empty.</param>
        /// <param name="V">Returns the vector of cross products. Should be initially empty.</param>
        /// <param name="PRESS">If not null, the PRESS value of each factor is stored (vertically) here. </param>
        public static void ExecuteAnalysis(
            IROMatrix _X,                   // matrix of spectra (a spectra is a row of this matrix)
            IROMatrix _Y,                   // matrix of concentrations (a mixture is a row of this matrix)
            ref int numFactors,
            IBottomExtensibleMatrix xLoads, // out: the loads of the X matrix
            IBottomExtensibleMatrix yLoads, // out: the loads of the Y matrix
            IBottomExtensibleMatrix W,      // matrix of weighting values
            IRightExtensibleMatrix V,       // matrix of cross products
            IExtensibleVector PRESS         //vector of Y PRESS values
            )
        {
            // used variables:
            // n: number of spectra (number of tests, number of experiments)
            // p: number of slots (frequencies, ..) in each spectrum
            // m: number of constitutents (number of y values in each measurement)

            // X : n-p matrix of spectra (each spectra is a horizontal row)
            // Y : n-m matrix of concentrations


            const int    maxIterations = 1500;  // max number of iterations in one factorization step
            const double accuracy      = 1E-12; // accuracy that should be reached between subsequent calculations of the u-vector



            // use the mean spectrum as first row of the W matrix
            MatrixMath.HorizontalVector mean = new MatrixMath.HorizontalVector(_X.Columns);
            //  MatrixMath.ColumnsToZeroMean(X,mean);
            //W.AppendBottom(mean);

            IMatrix X = new MatrixMath.BEMatrix(_X.Rows, _X.Columns);

            MatrixMath.Copy(_X, X);
            IMatrix Y = new MatrixMath.BEMatrix(_Y.Rows, _Y.Columns);

            MatrixMath.Copy(_Y, Y);

            IMatrix u_prev = null;
            IMatrix w      = new MatrixMath.HorizontalVector(X.Columns); // horizontal vector of X (spectral) weighting
            IMatrix t      = new MatrixMath.VerticalVector(X.Rows);      // vertical vector of X  scores
            IMatrix u      = new MatrixMath.VerticalVector(X.Rows);      // vertical vector of Y scores
            IMatrix p      = new MatrixMath.HorizontalVector(X.Columns); // horizontal vector of X loads
            IMatrix q      = new MatrixMath.HorizontalVector(Y.Columns); // horizontal vector of Y loads

            int maxFactors = Math.Min(X.Columns, X.Rows);

            numFactors = numFactors <= 0 ? maxFactors : Math.Min(numFactors, maxFactors);

            if (PRESS != null)
            {
                PRESS.Append(new MatrixMath.Scalar(MatrixMath.SumOfSquares(Y))); // Press value for not decomposed Y
            }

            for (int nFactor = 0; nFactor < numFactors; nFactor++)
            {
                //Console.WriteLine("Factor_{0}:",nFactor);
                //Console.WriteLine("X:"+X.ToString());
                //Console.WriteLine("Y:"+Y.ToString());


                // 1. Use as start vector for the y score the first column of the
                // y-matrix
                MatrixMath.Submatrix(Y, u); // u is now a vertical vector of concentrations of the first constituents

                for (int iter = 0; iter < maxIterations; iter++)
                {
                    // 2. Calculate the X (spectrum) weighting vector
                    MatrixMath.MultiplyFirstTransposed(u, X, w); // w is a horizontal vector

                    // 3. Normalize w to unit length
                    MatrixMath.NormalizeRows(w); // w now has unit length

                    // 4. Calculate X (spectral) scores
                    MatrixMath.MultiplySecondTransposed(X, w, t); // t is a vertical vector of n numbers

                    // 5. Calculate the Y (concentration) loading vector
                    MatrixMath.MultiplyFirstTransposed(t, Y, q); // q is a horizontal vector of m (number of constitutents)

                    // 5.1 Normalize q to unit length
                    MatrixMath.NormalizeRows(q);

                    // 6. Calculate the Y (concentration) score vector u
                    MatrixMath.MultiplySecondTransposed(Y, q, u); // u is a vertical vector of n numbers

                    // 6.1 Compare
                    // Compare this with the previous one
                    if (u_prev != null && MatrixMath.IsEqual(u_prev, u, accuracy))
                    {
                        break;
                    }
                    if (u_prev == null)
                    {
                        u_prev = new MatrixMath.VerticalVector(X.Rows);
                    }
                    MatrixMath.Copy(u, u_prev); // stores the content of u in u_prev
                } // for all iterations

                // Store the scores of X
                //factors.AppendRight(t);


                // 7. Calculate the inner scalar (cross product)
                double            length_of_t = MatrixMath.LengthOf(t);
                MatrixMath.Scalar v           = new MatrixMath.Scalar(0);
                MatrixMath.MultiplyFirstTransposed(u, t, v);
                if (length_of_t != 0)
                {
                    v = v / MatrixMath.Square(length_of_t);
                }

                // 8. Calculate the new loads for the X (spectral) matrix
                MatrixMath.MultiplyFirstTransposed(t, X, p); // p is a horizontal vector of loads
                // Normalize p by the spectral scores

                if (length_of_t != 0)
                {
                    MatrixMath.MultiplyScalar(p, 1 / MatrixMath.Square(length_of_t), p);
                }

                // 9. Calculate the new residua for the X (spectral) and Y (concentration) matrix
                //MatrixMath.MultiplyScalar(t,length_of_t*v,t); // original t times the cross product

                MatrixMath.SubtractProductFromSelf(t, p, X);

                MatrixMath.MultiplyScalar(t, v, t);          // original t times the cross product
                MatrixMath.SubtractProductFromSelf(t, q, Y); // to calculate residual Y

                // Store the loads of X and Y in the output result matrix
                xLoads.AppendBottom(p);
                yLoads.AppendBottom(q);
                W.AppendBottom(w);
                V.AppendRight(v);

                if (PRESS != null)
                {
                    double pressValue = MatrixMath.SumOfSquares(Y);
                    PRESS.Append(new MatrixMath.Scalar(pressValue));
                }
                // Calculate SEPcv. If SEPcv is greater than for the actual number of factors,
                // break since the optimal number of factors was found. If not, repeat the calculations
                // with the residual matrizes for the next factor.
            } // for all factors
        }
Пример #2
0
        public static void Predict(
            IROMatrix XU,             // unknown spectrum or spectra,  horizontal oriented
            IROMatrix xLoads,         // x-loads matrix
            IROMatrix yLoads,         // y-loads matrix
            IROMatrix W,              // weighting matrix
            IROMatrix V,              // Cross product vector
            int numFactors,           // number of factors to use for prediction
            IMatrix predictedY,       // Matrix of predicted y-values, must be same number of rows as spectra
            IMatrix spectralResiduals // Matrix of spectral residuals, n rows x 1 column, can be zero
            )
        {
            // now predicting a "unkown" spectra
            MatrixMath.Scalar           si = new MatrixMath.Scalar(0);
            MatrixMath.HorizontalVector Cu = new MatrixMath.HorizontalVector(yLoads.Columns);

            MatrixMath.HorizontalVector wi    = new MatrixMath.HorizontalVector(XU.Columns);
            MatrixMath.HorizontalVector cuadd = new MatrixMath.HorizontalVector(yLoads.Columns);

            // xu holds a single spectrum extracted out of XU
            MatrixMath.HorizontalVector xu = new MatrixMath.HorizontalVector(XU.Columns);

            // xl holds temporarily a row of the xLoads matrix+
            MatrixMath.HorizontalVector xl = new MatrixMath.HorizontalVector(xLoads.Columns);


            int maxFactors = Math.Min(yLoads.Rows, numFactors);


            for (int nSpectrum = 0; nSpectrum < XU.Rows; nSpectrum++)
            {
                MatrixMath.Submatrix(XU, xu, nSpectrum, 0); // extract one spectrum to predict
                MatrixMath.ZeroMatrix(Cu);                  // Set Cu=0
                for (int i = 0; i < maxFactors; i++)
                {
                    //1. Calculate the unknown spectral score for a weighting vector
                    MatrixMath.Submatrix(W, wi, i, 0);
                    MatrixMath.MultiplySecondTransposed(wi, xu, si);
                    // take the y loading vector
                    MatrixMath.Submatrix(yLoads, cuadd, i, 0);
                    // and multiply it with the cross product and the score
                    MatrixMath.MultiplyScalar(cuadd, si * V[0, i], cuadd);
                    // Add it to the predicted y-values
                    MatrixMath.Add(Cu, cuadd, Cu);
                    // remove the spectral contribution of the factor from the spectrum
                    // TODO this is quite ineffective: in every loop we extract the xl vector, we have to find a shortcut for this!
                    MatrixMath.Submatrix(xLoads, xl, i, 0);
                    MatrixMath.SubtractProductFromSelf(xl, (double)si, xu);
                }
                // xu now contains the spectral residual,
                // Cu now contains the predicted y values
                if (null != predictedY)
                {
                    MatrixMath.SetRow(Cu, 0, predictedY, nSpectrum);
                }

                if (null != spectralResiduals)
                {
                    spectralResiduals[nSpectrum, 0] = MatrixMath.SumOfSquares(xu);
                }
            } // for each spectrum in XU
        }     // end partial-least-squares-predict
Пример #3
0
        /// <summary>
        /// Makes a PCA (a principal component analysis) of the table or the selected columns / rows and stores the results in a newly created table.
        /// </summary>
        /// <param name="mainDocument">The main document of the application.</param>
        /// <param name="srctable">The table where the data come from.</param>
        /// <param name="selectedColumns">The selected columns.</param>
        /// <param name="selectedRows">The selected rows.</param>
        /// <param name="bHorizontalOrientedSpectrum">True if a spectrum is a single row, False if a spectrum is a single column.</param>
        /// <param name="maxNumberOfFactors">The maximum number of factors to calculate.</param>
        /// <returns></returns>
        public static string PrincipalComponentAnalysis(
            Altaxo.AltaxoDocument mainDocument,
            Altaxo.Data.DataTable srctable,
            IAscendingIntegerCollection selectedColumns,
            IAscendingIntegerCollection selectedRows,
            bool bHorizontalOrientedSpectrum,
            int maxNumberOfFactors
            )
        {
            bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count);
            int  prenumcols          = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount;

            // check for the number of numeric columns
            int numcols = 0;

            for (int i = 0; i < prenumcols; i++)
            {
                int idx = bUseSelectedColumns ? selectedColumns[i] : i;
                if (srctable[i] is Altaxo.Data.INumericColumn)
                {
                    numcols++;
                }
            }

            // check the number of rows
            bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count);

            int numrows;

            if (bUseSelectedRows)
            {
                numrows = selectedRows.Count;
            }
            else
            {
                numrows = 0;
                for (int i = 0; i < numcols; i++)
                {
                    int idx = bUseSelectedColumns ? selectedColumns[i] : i;
                    numrows = Math.Max(numrows, srctable[idx].Count);
                }
            }

            // check that both dimensions are at least 2 - otherwise PCA is not possible
            if (numrows < 2)
            {
                return("At least two rows are neccessary to do Principal Component Analysis!");
            }
            if (numcols < 2)
            {
                return("At least two numeric columns are neccessary to do Principal Component Analysis!");
            }

            // Create a matrix of appropriate dimensions and fill it

            MatrixMath.BEMatrix matrixX;
            if (bHorizontalOrientedSpectrum)
            {
                matrixX = new MatrixMath.BEMatrix(numrows, numcols);
                int ccol = 0; // current column in the matrix
                for (int i = 0; i < prenumcols; i++)
                {
                    int colidx = bUseSelectedColumns ? selectedColumns[i] : i;
                    Altaxo.Data.INumericColumn col = srctable[colidx] as Altaxo.Data.INumericColumn;
                    if (null != col)
                    {
                        for (int j = 0; j < numrows; j++)
                        {
                            int rowidx = bUseSelectedRows ? selectedRows[j] : j;
                            matrixX[j, ccol] = col[rowidx];
                        }
                        ++ccol;
                    }
                }
            }    // end if it was a horizontal oriented spectrum
            else // if it is a vertical oriented spectrum
            {
                matrixX = new MatrixMath.BEMatrix(numcols, numrows);
                int ccol = 0; // current column in the matrix
                for (int i = 0; i < prenumcols; i++)
                {
                    int colidx = bUseSelectedColumns ? selectedColumns[i] : i;
                    Altaxo.Data.INumericColumn col = srctable[colidx] as Altaxo.Data.INumericColumn;
                    if (null != col)
                    {
                        for (int j = 0; j < numrows; j++)
                        {
                            int rowidx = bUseSelectedRows ? selectedRows[j] : j;
                            matrixX[ccol, j] = col[rowidx];
                        }
                        ++ccol;
                    }
                }
            } // if it was a vertical oriented spectrum

            // now do PCA with the matrix
            MatrixMath.REMatrix         factors           = new MatrixMath.REMatrix(0, 0);
            MatrixMath.BEMatrix         loads             = new MatrixMath.BEMatrix(0, 0);
            MatrixMath.BEMatrix         residualVariances = new MatrixMath.BEMatrix(0, 0);
            MatrixMath.HorizontalVector meanX             = new MatrixMath.HorizontalVector(matrixX.Columns);
            // first, center the matrix
            MatrixMath.ColumnsToZeroMean(matrixX, meanX);
            MatrixMath.NIPALS_HO(matrixX, maxNumberOfFactors, 1E-9, factors, loads, residualVariances);

            // now we have to create a new table where to place the calculated factors and loads
            // we will do that in a vertical oriented manner, i.e. even if the loads are
            // here in horizontal vectors: in our table they are stored in (vertical) columns
            Altaxo.Data.DataTable table = new Altaxo.Data.DataTable("PCA of " + srctable.Name);

            // Fill the Table
            table.Suspend();

            // first of all store the meanscore
            {
                double meanScore = MatrixMath.LengthOf(meanX);
                MatrixMath.NormalizeRows(meanX);

                Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn();
                for (int i = 0; i < factors.Rows; i++)
                {
                    col[i] = meanScore;
                }
                table.DataColumns.Add(col, "MeanFactor", Altaxo.Data.ColumnKind.V, 0);
            }

            // first store the factors
            for (int i = 0; i < factors.Columns; i++)
            {
                Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn();
                for (int j = 0; j < factors.Rows; j++)
                {
                    col[j] = factors[j, i];
                }

                table.DataColumns.Add(col, "Factor" + i.ToString(), Altaxo.Data.ColumnKind.V, 1);
            }

            // now store the mean of the matrix
            {
                Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn();

                for (int j = 0; j < meanX.Columns; j++)
                {
                    col[j] = meanX[0, j];
                }
                table.DataColumns.Add(col, "MeanLoad", Altaxo.Data.ColumnKind.V, 2);
            }

            // now store the loads - careful - they are horizontal in the matrix
            for (int i = 0; i < loads.Rows; i++)
            {
                Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn();

                for (int j = 0; j < loads.Columns; j++)
                {
                    col[j] = loads[i, j];
                }

                table.DataColumns.Add(col, "Load" + i.ToString(), Altaxo.Data.ColumnKind.V, 3);
            }

            // now store the residual variances, they are vertical in the vector
            {
                Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn();

                for (int i = 0; i < residualVariances.Rows; i++)
                {
                    col[i] = residualVariances[i, 0];
                }
                table.DataColumns.Add(col, "ResidualVariance", Altaxo.Data.ColumnKind.V, 4);
            }

            table.Resume();
            mainDocument.DataTableCollection.Add(table);
            // create a new worksheet without any columns
            Current.ProjectService.CreateNewWorksheet(table);

            return(null);
        }