public static void Predict( IROMatrix XU, // unknown spectrum or spectra, horizontal oriented IROMatrix xLoads, // x-loads matrix IROMatrix yLoads, // y-loads matrix IROMatrix W, // weighting matrix IROMatrix V, // Cross product vector int numFactors, // number of factors to use for prediction IMatrix predictedY, // Matrix of predicted y-values, must be same number of rows as spectra IMatrix spectralResiduals // Matrix of spectral residuals, n rows x 1 column, can be zero ) { // now predicting a "unkown" spectra MatrixMath.Scalar si = new MatrixMath.Scalar(0); MatrixMath.HorizontalVector Cu = new MatrixMath.HorizontalVector(yLoads.Columns); MatrixMath.HorizontalVector wi = new MatrixMath.HorizontalVector(XU.Columns); MatrixMath.HorizontalVector cuadd = new MatrixMath.HorizontalVector(yLoads.Columns); // xu holds a single spectrum extracted out of XU MatrixMath.HorizontalVector xu = new MatrixMath.HorizontalVector(XU.Columns); // xl holds temporarily a row of the xLoads matrix+ MatrixMath.HorizontalVector xl = new MatrixMath.HorizontalVector(xLoads.Columns); int maxFactors = Math.Min(yLoads.Rows,numFactors); for(int nSpectrum=0;nSpectrum<XU.Rows;nSpectrum++) { MatrixMath.Submatrix(XU,xu,nSpectrum,0); // extract one spectrum to predict MatrixMath.ZeroMatrix(Cu); // Set Cu=0 for(int i=0;i<maxFactors;i++) { //1. Calculate the unknown spectral score for a weighting vector MatrixMath.Submatrix(W,wi,i,0); MatrixMath.MultiplySecondTransposed(wi,xu,si); // take the y loading vector MatrixMath.Submatrix(yLoads,cuadd,i,0); // and multiply it with the cross product and the score MatrixMath.MultiplyScalar(cuadd,si*V[0,i],cuadd); // Add it to the predicted y-values MatrixMath.Add(Cu,cuadd,Cu); // remove the spectral contribution of the factor from the spectrum // TODO this is quite ineffective: in every loop we extract the xl vector, we have to find a shortcut for this! MatrixMath.Submatrix(xLoads,xl,i,0); MatrixMath.SubtractProductFromSelf(xl,(double)si,xu); } // xu now contains the spectral residual, // Cu now contains the predicted y values if(null!=predictedY) { MatrixMath.SetRow(Cu,0,predictedY,nSpectrum); } if(null!=spectralResiduals) { spectralResiduals[nSpectrum,0] = MatrixMath.SumOfSquares(xu); } } // for each spectrum in XU } // end partial-least-squares-predict
/// <summary> /// Partial least squares (PLS) decomposition of the matrizes X and Y. /// </summary> /// <param name="_X">The X ("spectrum") matrix, centered and preprocessed.</param> /// <param name="_Y">The Y ("concentration") matrix (centered).</param> /// <param name="numFactors">Number of factors to calculate.</param> /// <param name="xLoads">Returns the matrix of eigenvectors of X. Should be initially empty.</param> /// <param name="yLoads">Returns the matrix of eigenvectors of Y. Should be initially empty. </param> /// <param name="W">Returns the matrix of weighting values. Should be initially empty.</param> /// <param name="V">Returns the vector of cross products. Should be initially empty.</param> /// <param name="PRESS">If not null, the PRESS value of each factor is stored (vertically) here. </param> public static void ExecuteAnalysis( IROMatrix _X, // matrix of spectra (a spectra is a row of this matrix) IROMatrix _Y, // matrix of concentrations (a mixture is a row of this matrix) ref int numFactors, IBottomExtensibleMatrix xLoads, // out: the loads of the X matrix IBottomExtensibleMatrix yLoads, // out: the loads of the Y matrix IBottomExtensibleMatrix W, // matrix of weighting values IRightExtensibleMatrix V, // matrix of cross products IExtensibleVector PRESS //vector of Y PRESS values ) { // used variables: // n: number of spectra (number of tests, number of experiments) // p: number of slots (frequencies, ..) in each spectrum // m: number of constitutents (number of y values in each measurement) // X : n-p matrix of spectra (each spectra is a horizontal row) // Y : n-m matrix of concentrations const int maxIterations = 1500; // max number of iterations in one factorization step const double accuracy = 1E-12; // accuracy that should be reached between subsequent calculations of the u-vector // use the mean spectrum as first row of the W matrix MatrixMath.HorizontalVector mean = new MatrixMath.HorizontalVector(_X.Columns); // MatrixMath.ColumnsToZeroMean(X,mean); //W.AppendBottom(mean); IMatrix X = new MatrixMath.BEMatrix(_X.Rows,_X.Columns); MatrixMath.Copy(_X,X); IMatrix Y = new MatrixMath.BEMatrix(_Y.Rows,_Y.Columns); MatrixMath.Copy(_Y,Y); IMatrix u_prev = null; IMatrix w = new MatrixMath.HorizontalVector(X.Columns); // horizontal vector of X (spectral) weighting IMatrix t = new MatrixMath.VerticalVector(X.Rows); // vertical vector of X scores IMatrix u = new MatrixMath.VerticalVector(X.Rows); // vertical vector of Y scores IMatrix p = new MatrixMath.HorizontalVector(X.Columns); // horizontal vector of X loads IMatrix q = new MatrixMath.HorizontalVector(Y.Columns); // horizontal vector of Y loads int maxFactors = Math.Min(X.Columns,X.Rows); numFactors = numFactors<=0 ? maxFactors : Math.Min(numFactors,maxFactors); if(PRESS!=null) { PRESS.Append(new MatrixMath.Scalar(MatrixMath.SumOfSquares(Y))); // Press value for not decomposed Y } for(int nFactor=0; nFactor<numFactors; nFactor++) { //Console.WriteLine("Factor_{0}:",nFactor); //Console.WriteLine("X:"+X.ToString()); //Console.WriteLine("Y:"+Y.ToString()); // 1. Use as start vector for the y score the first column of the // y-matrix MatrixMath.Submatrix(Y,u); // u is now a vertical vector of concentrations of the first constituents for(int iter=0;iter<maxIterations;iter++) { // 2. Calculate the X (spectrum) weighting vector MatrixMath.MultiplyFirstTransposed(u,X,w); // w is a horizontal vector // 3. Normalize w to unit length MatrixMath.NormalizeRows(w); // w now has unit length // 4. Calculate X (spectral) scores MatrixMath.MultiplySecondTransposed(X,w,t); // t is a vertical vector of n numbers // 5. Calculate the Y (concentration) loading vector MatrixMath.MultiplyFirstTransposed(t,Y,q); // q is a horizontal vector of m (number of constitutents) // 5.1 Normalize q to unit length MatrixMath.NormalizeRows(q); // 6. Calculate the Y (concentration) score vector u MatrixMath.MultiplySecondTransposed(Y,q,u); // u is a vertical vector of n numbers // 6.1 Compare // Compare this with the previous one if(u_prev!=null && MatrixMath.IsEqual(u_prev,u,accuracy)) break; if(u_prev==null) u_prev = new MatrixMath.VerticalVector(X.Rows); MatrixMath.Copy(u,u_prev); // stores the content of u in u_prev } // for all iterations // Store the scores of X //factors.AppendRight(t); // 7. Calculate the inner scalar (cross product) double length_of_t = MatrixMath.LengthOf(t); MatrixMath.Scalar v = new MatrixMath.Scalar(0); MatrixMath.MultiplyFirstTransposed(u,t,v); if(length_of_t!=0) v = v/MatrixMath.Square(length_of_t); // 8. Calculate the new loads for the X (spectral) matrix MatrixMath.MultiplyFirstTransposed(t,X,p); // p is a horizontal vector of loads // Normalize p by the spectral scores if(length_of_t!=0) MatrixMath.MultiplyScalar(p,1/MatrixMath.Square(length_of_t),p); // 9. Calculate the new residua for the X (spectral) and Y (concentration) matrix //MatrixMath.MultiplyScalar(t,length_of_t*v,t); // original t times the cross product MatrixMath.SubtractProductFromSelf(t,p,X); MatrixMath.MultiplyScalar(t,v,t); // original t times the cross product MatrixMath.SubtractProductFromSelf(t,q,Y); // to calculate residual Y // Store the loads of X and Y in the output result matrix xLoads.AppendBottom(p); yLoads.AppendBottom(q); W.AppendBottom(w); V.AppendRight(v); if(PRESS!=null) { double pressValue=MatrixMath.SumOfSquares(Y); PRESS.Append(new MatrixMath.Scalar(pressValue)); } // Calculate SEPcv. If SEPcv is greater than for the actual number of factors, // break since the optimal number of factors was found. If not, repeat the calculations // with the residual matrizes for the next factor. } // for all factors }
/// <summary> /// Preprocess the y values for analysis (mean center, scale currently not used). /// </summary> /// <param name="matrixY">Matrix of y values. On return, this contains the preprocessed y values.</param> /// <param name="meanY">On return, contains the mean y value(s).</param> /// <param name="scaleY">On return, contains the scale value(s).</param> public static void PreprocessYForAnalysis(IMatrix matrixY, out IVector meanY, out IVector scaleY) { meanY = new MatrixMath.HorizontalVector(matrixY.Columns); scaleY = new MatrixMath.HorizontalVector(matrixY.Columns); VectorMath.Fill(scaleY,1); MatrixMath.ColumnsToZeroMean(matrixY, meanY); }
/// <summary> /// Makes a PCA (a principal component analysis) of the table or the selected columns / rows and stores the results in a newly created table. /// </summary> /// <param name="mainDocument">The main document of the application.</param> /// <param name="srctable">The table where the data come from.</param> /// <param name="selectedColumns">The selected columns.</param> /// <param name="selectedRows">The selected rows.</param> /// <param name="bHorizontalOrientedSpectrum">True if a spectrum is a single row, False if a spectrum is a single column.</param> /// <param name="maxNumberOfFactors">The maximum number of factors to calculate.</param> /// <returns></returns> public static string PrincipalComponentAnalysis( Altaxo.AltaxoDocument mainDocument, Altaxo.Data.DataTable srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows, bool bHorizontalOrientedSpectrum, int maxNumberOfFactors ) { bool bUseSelectedColumns = (null!=selectedColumns && 0!=selectedColumns.Count); int prenumcols = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount; // check for the number of numeric columns int numcols = 0; for(int i=0;i<prenumcols;i++) { int idx = bUseSelectedColumns ? selectedColumns[i] : i; if(srctable[i] is Altaxo.Data.INumericColumn) numcols++; } // check the number of rows bool bUseSelectedRows = (null!=selectedRows && 0!=selectedRows.Count); int numrows; if(bUseSelectedRows) numrows = selectedRows.Count; else { numrows = 0; for(int i=0;i<numcols;i++) { int idx = bUseSelectedColumns ? selectedColumns[i] : i; numrows = Math.Max(numrows,srctable[idx].Count); } } // check that both dimensions are at least 2 - otherwise PCA is not possible if(numrows<2) return "At least two rows are neccessary to do Principal Component Analysis!"; if(numcols<2) return "At least two numeric columns are neccessary to do Principal Component Analysis!"; // Create a matrix of appropriate dimensions and fill it MatrixMath.BEMatrix matrixX; if(bHorizontalOrientedSpectrum) { matrixX = new MatrixMath.BEMatrix(numrows,numcols); int ccol = 0; // current column in the matrix for(int i=0;i<prenumcols;i++) { int colidx = bUseSelectedColumns ? selectedColumns[i] : i; Altaxo.Data.INumericColumn col = srctable[colidx] as Altaxo.Data.INumericColumn; if(null!=col) { for(int j=0;j<numrows;j++) { int rowidx = bUseSelectedRows ? selectedRows[j] : j; matrixX[j,ccol] = col[rowidx]; } ++ccol; } } } // end if it was a horizontal oriented spectrum else // if it is a vertical oriented spectrum { matrixX = new MatrixMath.BEMatrix(numcols,numrows); int ccol = 0; // current column in the matrix for(int i=0;i<prenumcols;i++) { int colidx = bUseSelectedColumns ? selectedColumns[i] : i; Altaxo.Data.INumericColumn col = srctable[colidx] as Altaxo.Data.INumericColumn; if(null!=col) { for(int j=0;j<numrows;j++) { int rowidx = bUseSelectedRows ? selectedRows[j] : j; matrixX[ccol,j] = col[rowidx]; } ++ccol; } } } // if it was a vertical oriented spectrum // now do PCA with the matrix MatrixMath.REMatrix factors = new MatrixMath.REMatrix(0,0); MatrixMath.BEMatrix loads = new MatrixMath.BEMatrix(0,0); MatrixMath.BEMatrix residualVariances = new MatrixMath.BEMatrix(0,0); MatrixMath.HorizontalVector meanX = new MatrixMath.HorizontalVector(matrixX.Columns); // first, center the matrix MatrixMath.ColumnsToZeroMean(matrixX,meanX); MatrixMath.NIPALS_HO(matrixX,maxNumberOfFactors,1E-9,factors,loads,residualVariances); // now we have to create a new table where to place the calculated factors and loads // we will do that in a vertical oriented manner, i.e. even if the loads are // here in horizontal vectors: in our table they are stored in (vertical) columns Altaxo.Data.DataTable table = new Altaxo.Data.DataTable("PCA of " + srctable.Name); // Fill the Table table.Suspend(); // first of all store the meanscore { double meanScore = MatrixMath.LengthOf(meanX); MatrixMath.NormalizeRows(meanX); Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn(); for(int i=0;i<factors.Rows;i++) col[i] = meanScore; table.DataColumns.Add(col,"MeanFactor",Altaxo.Data.ColumnKind.V,0); } // first store the factors for(int i=0;i<factors.Columns;i++) { Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn(); for(int j=0;j<factors.Rows;j++) col[j] = factors[j,i]; table.DataColumns.Add(col,"Factor"+i.ToString(),Altaxo.Data.ColumnKind.V,1); } // now store the mean of the matrix { Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn(); for(int j=0;j<meanX.Columns;j++) col[j] = meanX[0,j]; table.DataColumns.Add(col,"MeanLoad",Altaxo.Data.ColumnKind.V,2); } // now store the loads - careful - they are horizontal in the matrix for(int i=0;i<loads.Rows;i++) { Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn(); for(int j=0;j<loads.Columns;j++) col[j] = loads[i,j]; table.DataColumns.Add(col,"Load"+i.ToString(),Altaxo.Data.ColumnKind.V,3); } // now store the residual variances, they are vertical in the vector { Altaxo.Data.DoubleColumn col = new Altaxo.Data.DoubleColumn(); for(int i=0;i<residualVariances.Rows;i++) col[i] = residualVariances[i,0]; table.DataColumns.Add(col,"ResidualVariance",Altaxo.Data.ColumnKind.V,4); } table.Resume(); mainDocument.DataTableCollection.Add(table); // create a new worksheet without any columns Current.ProjectService.CreateNewWorksheet(table); return null; }
/// <summary> /// This will process the spectra before analysis in multivariate calibration. /// </summary> /// <param name="preprocessOptions">Contains the information how to preprocess the spectra.</param> /// <param name="xOfX"></param> /// <param name="matrixX">The matrix of spectra. Each spectrum is a row of the matrix.</param> /// <param name="meanX"></param> /// <param name="scaleX"></param> public static void PreprocessSpectraForAnalysis( SpectralPreprocessingOptions preprocessOptions, IROVector xOfX, IMatrix matrixX, out IVector meanX, out IVector scaleX ) { // Before we can apply PLS, we have to center the x and y matrices meanX = new MatrixMath.HorizontalVector(matrixX.Columns); scaleX = new MatrixMath.HorizontalVector(matrixX.Columns); // MatrixMath.HorizontalVector scaleX = new MatrixMath.HorizontalVector(matrixX.Cols); preprocessOptions.SetRegionsByIdentification(xOfX); preprocessOptions.Process(matrixX,meanX,scaleX); }