/// <summary> /// Ensures that the selected columns in the source <see cref="DataColumnCollection"/> do also exist in the destination <see cref="DataColumnCollection"/>. /// </summary> /// <param name="sourceTable">The source table.</param> /// <param name="selectedSourceDataColumns">The selected source data columns. If this parameter is null, all source columns are selected.</param> /// <param name="destinationTable">The destination table.</param> public static void EnsureColumnsExistInDestinationCollection(this DataColumnCollection sourceTable, IAscendingIntegerCollection selectedSourceDataColumns, DataColumnCollection destinationTable) { if (null == selectedSourceDataColumns || 0 == selectedSourceDataColumns.Count) { selectedSourceDataColumns = Altaxo.Collections.ContiguousIntegerRange.FromStartAndCount(0, sourceTable.ColumnCount); } foreach (var colIdx in selectedSourceDataColumns) { var srcCol = sourceTable[colIdx]; destinationTable.EnsureExistence( sourceTable.GetColumnName(srcCol), srcCol.GetType(), sourceTable.GetColumnKind(srcCol), sourceTable.GetColumnGroup(srcCol) ); } }
/// <summary> /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to. /// </summary> /// <param name="srctable">Source table.</param> /// <param name="selectedColumns">Selected data columns in the source table. If the argument is null, all columns will be used.</param> /// <param name="selectedRows">Selected rows in the source table. If the argument is null, all rows will be used.</param> /// <param name="destinationTable">The table where the statistical results are written to.</param> public static void DoStatisticsOnColumns( this DataColumnCollection srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows, DataColumnCollection destinationTable ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount; bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); if (numcols == 0) { return; // nothing selected } // add a text column and some double columns // note: statistics is only possible for numeric columns since // otherwise in one column doubles and i.e. dates are mixed, which is not possible // 1st column is the name of the column of which the statistics is made var colCol = new Data.TextColumn(); // 2nd column is the mean var colMean = new Data.DoubleColumn(); // 3rd column is the standard deviation var colSd = new Data.DoubleColumn(); // 4th column is the standard e (N) var colSe = new Data.DoubleColumn(); // 5th column is the sum var colSum = new Data.DoubleColumn(); var colSumSqr = new Data.DoubleColumn(); // 6th column is the number of items for statistics var colN = new Data.DoubleColumn(); var colFracOneSigma = new Data.DoubleColumn(); var colFracTwoSigma = new Data.DoubleColumn(); var colFracThreeSigma = new Data.DoubleColumn(); var colMinimum = new DoubleColumn(); // Minimum of the values var colMaximum = new DoubleColumn(); // Maximum of the values int currRow = 0; for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } int rows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount; if (rows == 0) { continue; } // now do the statistics var ncol = (Data.INumericColumn)col; double sum = 0; double sumsqr = 0; int NN = 0; double minimum = double.PositiveInfinity; double maximum = double.NegativeInfinity; for (int i = 0; i < rows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (double.IsNaN(val)) { continue; } NN++; sum += val; sumsqr += (val * val); minimum = Math.Min(minimum, val); maximum = Math.Max(maximum, val); } // now fill a new row in the worksheet double mean = sum / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) { ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero } double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); // calculate fractions double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd; double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd; double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd; int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0; for (int i = 0; i < rows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (double.IsNaN(val)) { continue; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) { ++cntOneSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) { ++cntTwoSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) { ++cntThreeSigma; } } if (0 == NN) { minimum = maximum = double.NaN; } colCol[currRow] = col.Name; colMean[currRow] = mean; // mean colSd[currRow] = sd; colSe[currRow] = se; colSum[currRow] = sum; colSumSqr[currRow] = sumsqr; colN[currRow] = NN; colFracOneSigma[currRow] = cntOneSigma / (double)NN; colFracTwoSigma[currRow] = cntTwoSigma / (double)NN; colFracThreeSigma[currRow] = cntThreeSigma / (double)NN; colMinimum[currRow] = minimum; colMaximum[currRow] = maximum; currRow++; // for the next column } // for all selected columns if (currRow != 0) { destinationTable.EnsureExistence(DefaultColumnNameColumnName, typeof(TextColumn), ColumnKind.X, 0).Append(colCol); AppendStatisticalData(destinationTable, colMean, colSd, colSe, colSum, colSumSqr, colN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum); } }
private static void AppendStatisticalData(DataColumnCollection destinationTable, Data.DoubleColumn colMean, Data.DoubleColumn colSd, Data.DoubleColumn colSe, Data.DoubleColumn colSum, Data.DoubleColumn colSumSqr, Data.DoubleColumn colN, Data.DoubleColumn fracOneSigma, Data.DoubleColumn fracTwoSigma, Data.DoubleColumn fracThreeSigma, DoubleColumn minimum, DoubleColumn maximum) { destinationTable.EnsureExistence(DefaultMeanColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colMean); destinationTable.EnsureExistence(DefaultStandardErrorColumnName, typeof(DoubleColumn), ColumnKind.Err, 0).Append(colSe); destinationTable.EnsureExistence(DefaultStandardDeviationColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSd); destinationTable.EnsureExistence(DefaultSumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSum); destinationTable.EnsureExistence(DefaultSumSqrColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSumSqr); destinationTable.EnsureExistence(DefaultNumberOfItemsColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colN); destinationTable.EnsureExistence(DefaultFractionInOneSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracOneSigma); destinationTable.EnsureExistence(DefaultFractionInTwoSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracTwoSigma); destinationTable.EnsureExistence(DefaultFractionInThreeSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracThreeSigma); destinationTable.EnsureExistence(DefaultMinimumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(minimum); destinationTable.EnsureExistence(DefaultMaximumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(maximum); }
/// <summary> /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to. /// </summary> /// <param name="srctable">Source table.</param> /// <param name="selectedColumns">Selected data columns in the source table.</param> /// <param name="selectedRows">Selected rows in the source table.</param> /// <param name="destinationTable">The table where the statistical results are written to.</param> public static void DoStatisticsOnRows( this DataColumnCollection srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows, DataColumnCollection destinationTable ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount; if (numcols == 0) { return; // nothing selected } bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); int numrows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount; if (numrows == 0) { return; } var cRows = new DoubleColumn(); // 1st column is the mean, and holds the sum during the calculation var colMean = new Data.DoubleColumn(); // 2rd column is the standard deviation, and holds the square sum during calculation var colSD = new Data.DoubleColumn(); // 3th column is the standard e (N) var colSE = new Data.DoubleColumn(); // 4th column is the sum var colSum = new Data.DoubleColumn(); // 5th column is the number of items for statistics var colNN = new Data.DoubleColumn(); var colSumSqr = new Data.DoubleColumn(); var colFracOneSigma = new Data.DoubleColumn(); var colFracTwoSigma = new Data.DoubleColumn(); var colFracThreeSigma = new Data.DoubleColumn(); var colMinimum = new DoubleColumn(); var colMaximum = new DoubleColumn(); // first fill the cols c1, c2, c5 with zeros because we want to sum up for (int i = 0; i < numrows; i++) { colSum[i] = 0; colSumSqr[i] = 0; colNN[i] = 0; colMinimum[i] = double.PositiveInfinity; colMaximum[i] = double.NegativeInfinity; } for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } // now do the statistics var ncol = (Data.INumericColumn)col; for (int i = 0; i < numrows; i++) { int row = bUseSelectedRows ? selectedRows[i] : i; cRows[i] = row; double val = ncol[row]; if (double.IsNaN(val)) { continue; } colSum[i] += val; colSumSqr[i] += val * val; colNN[i] += 1; colMinimum[i] = Math.Min(colMinimum[i], val); colMaximum[i] = Math.Max(colMaximum[i], val); } } // for all selected columns // now calculate the statistics for (int i = 0; i < numrows; i++) { // now fill a new row in the worksheet double NN = colNN[i]; double sum = colSum[i]; double sumsqr = colSumSqr[i]; if (NN > 0) { double mean = sum / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) { ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero } double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); colMean[i] = mean; // mean colSD[i] = sd; colSE[i] = se; } else { colMinimum[i] = double.NaN; colMaximum[i] = double.NaN; } } // for all rows // calculate fractions for (int i = 0; i < numrows; i++) { int row = bUseSelectedRows ? selectedRows[i] : i; double mean = colMean[i]; double sd = colSD[i]; // calculate fractions double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd; double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd; double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd; int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0; for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } // now do the statistics var ncol = (Data.INumericColumn)col; double val = ncol[row]; if (double.IsNaN(val)) { continue; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) { ++cntOneSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) { ++cntTwoSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) { ++cntThreeSigma; } } colFracOneSigma[i] = cntOneSigma / colNN[i]; colFracTwoSigma[i] = cntTwoSigma / colNN[i]; colFracThreeSigma[i] = cntThreeSigma / colNN[i]; } destinationTable.EnsureExistence(DefaultRowNumberColumnName, typeof(DoubleColumn), ColumnKind.X, 0).Append(cRows); AppendStatisticalData(destinationTable, colMean, colSD, colSE, colSum, colSumSqr, colNN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum); }
public static DataColumnCollection GetPropertyColumns(this Origin.Worksheet wks) { if (null == wks) { throw new ArgumentNullException("wks"); } var result = new DataColumnCollection(); // I found no way to ask, if a label column is used or not // therefore, we have to try all cells inside the longname, the units and the comments label column var labelCols = new Dictionary <string, Altaxo.Data.TextColumn>(); DataColumn destLongNameCol = null, destUnitCol = null, destCommentCol = null; var paraCol = new DataColumn[20]; var srcDataCols = wks.Cols; for (int i = 0; i < srcDataCols; ++i) { var srcCol = wks.Columns[i]; if (!string.IsNullOrEmpty(srcCol.LongName)) { if (null == destLongNameCol) { destLongNameCol = result.EnsureExistence("LongName", typeof(TextColumn), ColumnKind.V, 0); } destLongNameCol[i] = srcCol.LongName; } if (!string.IsNullOrEmpty(srcCol.Units)) { if (null == destUnitCol) { destUnitCol = result.EnsureExistence("Unit", typeof(TextColumn), ColumnKind.V, 0); } destUnitCol[i] = srcCol.Units; } if (!string.IsNullOrEmpty(srcCol.Comments)) { if (null == destCommentCol) { destCommentCol = result.EnsureExistence("Comments", typeof(TextColumn), ColumnKind.V, 0); } destCommentCol[i] = srcCol.Comments; } for (int nPara = 0; nPara <= 11; ++nPara) { if (!string.IsNullOrEmpty(srcCol.Parameter[nPara])) { if (null == paraCol[nPara]) { paraCol[nPara] = result.EnsureExistence("Parameter" + nPara.ToString(), typeof(TextColumn), ColumnKind.V, 0); } paraCol[nPara][i] = srcCol.Parameter[nPara]; } } } return(result); }