/// <summary> /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to. /// </summary> /// <param name="srctable">Source table.</param> /// <param name="selectedColumns">Selected data columns in the source table.</param> /// <param name="selectedRows">Selected rows in the source table.</param> /// <param name="destinationTable">The table where the statistical results are written to.</param> public static void DoStatisticsOnRows( this DataColumnCollection srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows, DataColumnCollection destinationTable ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount; if (numcols == 0) return; // nothing selected bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); int numrows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount; if (numrows == 0) return; Data.DoubleColumn cRows = new DoubleColumn(); // 1st column is the mean, and holds the sum during the calculation Data.DoubleColumn colMean = new Data.DoubleColumn(); // 2rd column is the standard deviation, and holds the square sum during calculation Data.DoubleColumn colSD = new Data.DoubleColumn(); // 3th column is the standard e (N) Data.DoubleColumn colSE = new Data.DoubleColumn(); // 4th column is the sum Data.DoubleColumn colSum = new Data.DoubleColumn(); // 5th column is the number of items for statistics Data.DoubleColumn colNN = new Data.DoubleColumn(); var colSumSqr = new Data.DoubleColumn(); var colFracOneSigma = new Data.DoubleColumn(); var colFracTwoSigma = new Data.DoubleColumn(); var colFracThreeSigma = new Data.DoubleColumn(); var colMinimum = new DoubleColumn(); var colMaximum = new DoubleColumn(); // first fill the cols c1, c2, c5 with zeros because we want to sum up for (int i = 0; i < numrows; i++) { colSum[i] = 0; colSumSqr[i] = 0; colNN[i] = 0; colMinimum[i] = double.PositiveInfinity; colMaximum[i] = double.NegativeInfinity; } for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) continue; // now do the statistics Data.INumericColumn ncol = (Data.INumericColumn)col; for (int i = 0; i < numrows; i++) { int row = bUseSelectedRows ? selectedRows[i] : i; cRows[i] = row; double val = ncol[row]; if (Double.IsNaN(val)) continue; colSum[i] += val; colSumSqr[i] += val * val; colNN[i] += 1; colMinimum[i] = Math.Min(colMinimum[i], val); colMaximum[i] = Math.Max(colMaximum[i], val); } } // for all selected columns // now calculate the statistics for (int i = 0; i < numrows; i++) { // now fill a new row in the worksheet double NN = colNN[i]; double sum = colSum[i]; double sumsqr = colSumSqr[i]; if (NN > 0) { double mean = sum / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); colMean[i] = mean; // mean colSD[i] = sd; colSE[i] = se; } else { colMinimum[i] = double.NaN; colMaximum[i] = double.NaN; } } // for all rows // calculate fractions for (int i = 0; i < numrows; i++) { int row = bUseSelectedRows ? selectedRows[i] : i; double mean = colMean[i]; double sd = colSD[i]; // calculate fractions double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd; double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd; double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd; int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0; for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) continue; // now do the statistics Data.INumericColumn ncol = (Data.INumericColumn)col; double val = ncol[row]; if (Double.IsNaN(val)) continue; if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) ++cntOneSigma; if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) ++cntTwoSigma; if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) ++cntThreeSigma; } colFracOneSigma[i] = cntOneSigma / (double)colNN[i]; colFracTwoSigma[i] = cntTwoSigma / (double)colNN[i]; colFracThreeSigma[i] = cntThreeSigma / (double)colNN[i]; } destinationTable.EnsureExistence(DefaultRowNumberColumnName, typeof(DoubleColumn), ColumnKind.X, 0).Append(cRows); AppendStatisticalData(destinationTable, colMean, colSD, colSE, colSum, colSumSqr, colNN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum); }
/// <summary> /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to. /// </summary> /// <param name="srctable">Source table.</param> /// <param name="selectedColumns">Selected data columns in the source table. If the argument is null, all columns will be used.</param> /// <param name="selectedRows">Selected rows in the source table. If the argument is null, all rows will be used.</param> /// <param name="destinationTable">The table where the statistical results are written to.</param> public static void DoStatisticsOnColumns( this DataColumnCollection srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows, DataColumnCollection destinationTable ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount; bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); if (numcols == 0) return; // nothing selected // add a text column and some double columns // note: statistics is only possible for numeric columns since // otherwise in one column doubles and i.e. dates are mixed, which is not possible // 1st column is the name of the column of which the statistics is made Data.TextColumn colCol = new Data.TextColumn(); // 2nd column is the mean Data.DoubleColumn colMean = new Data.DoubleColumn(); // 3rd column is the standard deviation Data.DoubleColumn colSd = new Data.DoubleColumn(); // 4th column is the standard e (N) Data.DoubleColumn colSe = new Data.DoubleColumn(); // 5th column is the sum Data.DoubleColumn colSum = new Data.DoubleColumn(); var colSumSqr = new Data.DoubleColumn(); // 6th column is the number of items for statistics Data.DoubleColumn colN = new Data.DoubleColumn(); var colFracOneSigma = new Data.DoubleColumn(); var colFracTwoSigma = new Data.DoubleColumn(); var colFracThreeSigma = new Data.DoubleColumn(); var colMinimum = new DoubleColumn(); // Minimum of the values var colMaximum = new DoubleColumn(); // Maximum of the values int currRow = 0; for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) continue; int rows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount; if (rows == 0) continue; // now do the statistics Data.INumericColumn ncol = (Data.INumericColumn)col; double sum = 0; double sumsqr = 0; int NN = 0; double minimum = double.PositiveInfinity; double maximum = double.NegativeInfinity; for (int i = 0; i < rows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (Double.IsNaN(val)) continue; NN++; sum += val; sumsqr += (val * val); minimum = Math.Min(minimum, val); maximum = Math.Max(maximum, val); } // now fill a new row in the worksheet double mean = sum / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); // calculate fractions double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd; double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd; double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd; int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0; for (int i = 0; i < rows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (Double.IsNaN(val)) continue; if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) ++cntOneSigma; if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) ++cntTwoSigma; if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) ++cntThreeSigma; } if (0 == NN) { minimum = maximum = double.NaN; } colCol[currRow] = col.Name; colMean[currRow] = mean; // mean colSd[currRow] = sd; colSe[currRow] = se; colSum[currRow] = sum; colSumSqr[currRow] = sumsqr; colN[currRow] = NN; colFracOneSigma[currRow] = cntOneSigma / (double)NN; colFracTwoSigma[currRow] = cntTwoSigma / (double)NN; colFracThreeSigma[currRow] = cntThreeSigma / (double)NN; colMinimum[currRow] = minimum; colMaximum[currRow] = maximum; currRow++; // for the next column } // for all selected columns if (currRow != 0) { destinationTable.EnsureExistence(DefaultColumnNameColumnName, typeof(TextColumn), ColumnKind.X, 0).Append(colCol); AppendStatisticalData(destinationTable, colMean, colSd, colSe, colSum, colSumSqr, colN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum); } }
public static void StatisticsOnRows( Altaxo.AltaxoDocument mainDocument, Altaxo.Data.DataTable srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount; if (numcols == 0) { return; // nothing selected } bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); int numrows = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount; if (numrows == 0) { return; } Altaxo.Data.DataTable table = new Altaxo.Data.DataTable(); // add a text column and some double columns // note: statistics is only possible for numeric columns since // otherwise in one column doubles and i.e. dates are mixed, which is not possible // 1st column is the mean, and holds the sum during the calculation Data.DoubleColumn c1 = new Data.DoubleColumn(); // 2rd column is the standard deviation, and holds the square sum during calculation Data.DoubleColumn c2 = new Data.DoubleColumn(); // 3th column is the standard e (N) Data.DoubleColumn c3 = new Data.DoubleColumn(); // 4th column is the sum Data.DoubleColumn c4 = new Data.DoubleColumn(); // 5th column is the number of items for statistics Data.DoubleColumn c5 = new Data.DoubleColumn(); table.DataColumns.Add(c1, "Mean"); table.DataColumns.Add(c2, "sd"); table.DataColumns.Add(c3, "se"); table.DataColumns.Add(c4, "Sum"); table.DataColumns.Add(c5, "N"); table.Suspend(); // first fill the cols c1, c2, c5 with zeros because we want to sum up for (int i = 0; i < numrows; i++) { c1[i] = 0; c2[i] = 0; c5[i] = 0; } for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } // now do the statistics Data.INumericColumn ncol = (Data.INumericColumn)col; for (int i = 0; i < numrows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (Double.IsNaN(val)) { continue; } c1[i] += val; c2[i] += val * val; c5[i] += 1; } } // for all selected columns // now calculate the statistics for (int i = 0; i < numrows; i++) { // now fill a new row in the worksheet double NN = c5[i]; double sum = c1[i]; double sumsqr = c2[i]; if (NN > 0) { double mean = c1[i] / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) { ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero } double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); c1[i] = mean; // mean c2[i] = sd; c3[i] = se; c4[i] = sum; c5[i] = NN; } } // for all rows // if a table was created, we add the table to the data set and // create a worksheet if (null != table) { table.Resume(); mainDocument.DataTableCollection.Add(table); // create a new worksheet without any columns Current.ProjectService.CreateNewWorksheet(table); } }
public static void StatisticsOnColumns( Altaxo.AltaxoDocument mainDocument, Altaxo.Data.DataTable srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount; bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); if (numcols == 0) { return; // nothing selected } Data.DataTable table = null; // the created table // add a text column and some double columns // note: statistics is only possible for numeric columns since // otherwise in one column doubles and i.e. dates are mixed, which is not possible // 1st column is the name of the column of which the statistics is made Data.TextColumn colCol = new Data.TextColumn(); // 2nd column is the mean Data.DoubleColumn colMean = new Data.DoubleColumn(); // 3rd column is the standard deviation Data.DoubleColumn colSd = new Data.DoubleColumn(); // 4th column is the standard e (N) Data.DoubleColumn colSe = new Data.DoubleColumn(); // 5th column is the sum Data.DoubleColumn colSum = new Data.DoubleColumn(); // 6th column is the number of items for statistics Data.DoubleColumn colN = new Data.DoubleColumn(); int currRow = 0; for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } int rows = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount; if (rows == 0) { continue; } // now do the statistics Data.INumericColumn ncol = (Data.INumericColumn)col; double sum = 0; double sumsqr = 0; int NN = 0; for (int i = 0; i < rows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (Double.IsNaN(val)) { continue; } NN++; sum += val; sumsqr += (val * val); } // now fill a new row in the worksheet if (NN > 0) { double mean = sum / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) { ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero } double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); colCol[currRow] = col.Name; colMean[currRow] = mean; // mean colSd[currRow] = sd; colSe[currRow] = se; colSum[currRow] = sum; colN[currRow] = NN; currRow++; // for the next column } } // for all selected columns if (currRow != 0) { table = new Altaxo.Data.DataTable("Statistics of " + srctable.Name); table.DataColumns.Add(colCol, "Col", Altaxo.Data.ColumnKind.X); // new : add a copy of all property columns; can be usefull for (int i = 0; i < srctable.PropertyColumnCount; i++) { DataColumn originalColumn = srctable.PropertyColumns[i]; DataColumn clonedColumn = (DataColumn)originalColumn.Clone(); clonedColumn.Clear(); for (int si = 0; si < numcols; si++) { int idx = bUseSelectedColumns ? selectedColumns[si] : si; clonedColumn[si] = originalColumn[idx]; } table.DataColumns.Add(clonedColumn, srctable.PropertyColumns.GetColumnName(i), srctable.PropertyColumns.GetColumnKind(i), srctable.PropertyColumns.GetColumnGroup(i)); } table.DataColumns.Add(colMean, "Mean"); table.DataColumns.Add(colSd, "Sd"); table.DataColumns.Add(colSe, "Se"); table.DataColumns.Add(colSum, "Sum"); table.DataColumns.Add(colN, "N"); mainDocument.DataTableCollection.Add(table); // create a new worksheet without any columns Current.ProjectService.CreateNewWorksheet(table); } }
/// <summary> /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to. /// </summary> /// <param name="srctable">Source table.</param> /// <param name="selectedColumns">Selected data columns in the source table. If the argument is null, all columns will be used.</param> /// <param name="selectedRows">Selected rows in the source table. If the argument is null, all rows will be used.</param> /// <param name="destinationTable">The table where the statistical results are written to.</param> public static void DoStatisticsOnColumns( this DataColumnCollection srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows, DataColumnCollection destinationTable ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount; bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); if (numcols == 0) { return; // nothing selected } // add a text column and some double columns // note: statistics is only possible for numeric columns since // otherwise in one column doubles and i.e. dates are mixed, which is not possible // 1st column is the name of the column of which the statistics is made var colCol = new Data.TextColumn(); // 2nd column is the mean var colMean = new Data.DoubleColumn(); // 3rd column is the standard deviation var colSd = new Data.DoubleColumn(); // 4th column is the standard e (N) var colSe = new Data.DoubleColumn(); // 5th column is the sum var colSum = new Data.DoubleColumn(); var colSumSqr = new Data.DoubleColumn(); // 6th column is the number of items for statistics var colN = new Data.DoubleColumn(); var colFracOneSigma = new Data.DoubleColumn(); var colFracTwoSigma = new Data.DoubleColumn(); var colFracThreeSigma = new Data.DoubleColumn(); var colMinimum = new DoubleColumn(); // Minimum of the values var colMaximum = new DoubleColumn(); // Maximum of the values int currRow = 0; for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } int rows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount; if (rows == 0) { continue; } // now do the statistics var ncol = (Data.INumericColumn)col; double sum = 0; double sumsqr = 0; int NN = 0; double minimum = double.PositiveInfinity; double maximum = double.NegativeInfinity; for (int i = 0; i < rows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (double.IsNaN(val)) { continue; } NN++; sum += val; sumsqr += (val * val); minimum = Math.Min(minimum, val); maximum = Math.Max(maximum, val); } // now fill a new row in the worksheet double mean = sum / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) { ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero } double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); // calculate fractions double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd; double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd; double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd; int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0; for (int i = 0; i < rows; i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if (double.IsNaN(val)) { continue; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) { ++cntOneSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) { ++cntTwoSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) { ++cntThreeSigma; } } if (0 == NN) { minimum = maximum = double.NaN; } colCol[currRow] = col.Name; colMean[currRow] = mean; // mean colSd[currRow] = sd; colSe[currRow] = se; colSum[currRow] = sum; colSumSqr[currRow] = sumsqr; colN[currRow] = NN; colFracOneSigma[currRow] = cntOneSigma / (double)NN; colFracTwoSigma[currRow] = cntTwoSigma / (double)NN; colFracThreeSigma[currRow] = cntThreeSigma / (double)NN; colMinimum[currRow] = minimum; colMaximum[currRow] = maximum; currRow++; // for the next column } // for all selected columns if (currRow != 0) { destinationTable.EnsureExistence(DefaultColumnNameColumnName, typeof(TextColumn), ColumnKind.X, 0).Append(colCol); AppendStatisticalData(destinationTable, colMean, colSd, colSe, colSum, colSumSqr, colN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum); } }
private static void AppendStatisticalData(DataColumnCollection destinationTable, Data.DoubleColumn colMean, Data.DoubleColumn colSd, Data.DoubleColumn colSe, Data.DoubleColumn colSum, Data.DoubleColumn colSumSqr, Data.DoubleColumn colN, Data.DoubleColumn fracOneSigma, Data.DoubleColumn fracTwoSigma, Data.DoubleColumn fracThreeSigma, DoubleColumn minimum, DoubleColumn maximum) { destinationTable.EnsureExistence(DefaultMeanColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colMean); destinationTable.EnsureExistence(DefaultStandardErrorColumnName, typeof(DoubleColumn), ColumnKind.Err, 0).Append(colSe); destinationTable.EnsureExistence(DefaultStandardDeviationColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSd); destinationTable.EnsureExistence(DefaultSumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSum); destinationTable.EnsureExistence(DefaultSumSqrColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSumSqr); destinationTable.EnsureExistence(DefaultNumberOfItemsColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colN); destinationTable.EnsureExistence(DefaultFractionInOneSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracOneSigma); destinationTable.EnsureExistence(DefaultFractionInTwoSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracTwoSigma); destinationTable.EnsureExistence(DefaultFractionInThreeSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracThreeSigma); destinationTable.EnsureExistence(DefaultMinimumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(minimum); destinationTable.EnsureExistence(DefaultMaximumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(maximum); }
/// <summary> /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to. /// </summary> /// <param name="srctable">Source table.</param> /// <param name="selectedColumns">Selected data columns in the source table.</param> /// <param name="selectedRows">Selected rows in the source table.</param> /// <param name="destinationTable">The table where the statistical results are written to.</param> public static void DoStatisticsOnRows( this DataColumnCollection srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows, DataColumnCollection destinationTable ) { bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount; if (numcols == 0) { return; // nothing selected } bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count); int numrows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount; if (numrows == 0) { return; } var cRows = new DoubleColumn(); // 1st column is the mean, and holds the sum during the calculation var colMean = new Data.DoubleColumn(); // 2rd column is the standard deviation, and holds the square sum during calculation var colSD = new Data.DoubleColumn(); // 3th column is the standard e (N) var colSE = new Data.DoubleColumn(); // 4th column is the sum var colSum = new Data.DoubleColumn(); // 5th column is the number of items for statistics var colNN = new Data.DoubleColumn(); var colSumSqr = new Data.DoubleColumn(); var colFracOneSigma = new Data.DoubleColumn(); var colFracTwoSigma = new Data.DoubleColumn(); var colFracThreeSigma = new Data.DoubleColumn(); var colMinimum = new DoubleColumn(); var colMaximum = new DoubleColumn(); // first fill the cols c1, c2, c5 with zeros because we want to sum up for (int i = 0; i < numrows; i++) { colSum[i] = 0; colSumSqr[i] = 0; colNN[i] = 0; colMinimum[i] = double.PositiveInfinity; colMaximum[i] = double.NegativeInfinity; } for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } // now do the statistics var ncol = (Data.INumericColumn)col; for (int i = 0; i < numrows; i++) { int row = bUseSelectedRows ? selectedRows[i] : i; cRows[i] = row; double val = ncol[row]; if (double.IsNaN(val)) { continue; } colSum[i] += val; colSumSqr[i] += val * val; colNN[i] += 1; colMinimum[i] = Math.Min(colMinimum[i], val); colMaximum[i] = Math.Max(colMaximum[i], val); } } // for all selected columns // now calculate the statistics for (int i = 0; i < numrows; i++) { // now fill a new row in the worksheet double NN = colNN[i]; double sum = colSum[i]; double sumsqr = colSumSqr[i]; if (NN > 0) { double mean = sum / NN; double ymy0sqr = sumsqr - sum * sum / NN; if (ymy0sqr < 0) { ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero } double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0; double se = sd / Math.Sqrt(NN); colMean[i] = mean; // mean colSD[i] = sd; colSE[i] = se; } else { colMinimum[i] = double.NaN; colMaximum[i] = double.NaN; } } // for all rows // calculate fractions for (int i = 0; i < numrows; i++) { int row = bUseSelectedRows ? selectedRows[i] : i; double mean = colMean[i]; double sd = colSD[i]; // calculate fractions double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd; double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd; double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd; int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0; for (int si = 0; si < numcols; si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if (!(col is Altaxo.Data.INumericColumn)) { continue; } // now do the statistics var ncol = (Data.INumericColumn)col; double val = ncol[row]; if (double.IsNaN(val)) { continue; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) { ++cntOneSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) { ++cntTwoSigma; } if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) { ++cntThreeSigma; } } colFracOneSigma[i] = cntOneSigma / colNN[i]; colFracTwoSigma[i] = cntTwoSigma / colNN[i]; colFracThreeSigma[i] = cntThreeSigma / colNN[i]; } destinationTable.EnsureExistence(DefaultRowNumberColumnName, typeof(DoubleColumn), ColumnKind.X, 0).Append(cRows); AppendStatisticalData(destinationTable, colMean, colSD, colSE, colSum, colSumSqr, colNN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum); }
public static void StatisticsOnColumns( Altaxo.AltaxoDocument mainDocument, Altaxo.Data.DataTable srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows ) { bool bUseSelectedColumns = (null!=selectedColumns && 0!=selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount; bool bUseSelectedRows = (null!=selectedRows && 0!=selectedRows.Count); if(numcols==0) return; // nothing selected Data.DataTable table = null; // the created table // add a text column and some double columns // note: statistics is only possible for numeric columns since // otherwise in one column doubles and i.e. dates are mixed, which is not possible // 1st column is the name of the column of which the statistics is made Data.TextColumn colCol = new Data.TextColumn(); // 2nd column is the mean Data.DoubleColumn colMean = new Data.DoubleColumn(); // 3rd column is the standard deviation Data.DoubleColumn colSd = new Data.DoubleColumn(); // 4th column is the standard e (N) Data.DoubleColumn colSe = new Data.DoubleColumn(); // 5th column is the sum Data.DoubleColumn colSum = new Data.DoubleColumn(); // 6th column is the number of items for statistics Data.DoubleColumn colN = new Data.DoubleColumn(); int currRow=0; for(int si=0;si<numcols;si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if(!(col is Altaxo.Data.INumericColumn)) continue; int rows = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount; if(rows==0) continue; // now do the statistics Data.INumericColumn ncol = (Data.INumericColumn)col; double sum=0; double sumsqr=0; int NN=0; for(int i=0;i<rows;i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if(Double.IsNaN(val)) continue; NN++; sum+=val; sumsqr+=(val*val); } // now fill a new row in the worksheet if(NN>0) { double mean = sum/NN; double ymy0sqr = sumsqr - sum*sum/NN; if(ymy0sqr<0) ymy0sqr=0; // if this is lesser zero, it is a rounding error, so set it to zero double sd = NN>1 ? Math.Sqrt(ymy0sqr/(NN-1)) : 0; double se = sd/Math.Sqrt(NN); colCol[currRow] = col.Name; colMean[currRow] = mean; // mean colSd[currRow] = sd; colSe[currRow] = se; colSum[currRow] = sum; colN[currRow] = NN; currRow++; // for the next column } } // for all selected columns if(currRow!=0) { table = new Altaxo.Data.DataTable("Statistics of " + srctable.Name); table.DataColumns.Add(colCol,"Col",Altaxo.Data.ColumnKind.X); // new : add a copy of all property columns; can be usefull for (int i = 0; i < srctable.PropertyColumnCount; i++) { DataColumn originalColumn = srctable.PropertyColumns[i]; DataColumn clonedColumn = (DataColumn)originalColumn.Clone(); clonedColumn.Clear(); for (int si = 0; si < numcols; si++) { int idx = bUseSelectedColumns ? selectedColumns[si] : si; clonedColumn[si] = originalColumn[idx]; } table.DataColumns.Add(clonedColumn, srctable.PropertyColumns.GetColumnName(i), srctable.PropertyColumns.GetColumnKind(i), srctable.PropertyColumns.GetColumnGroup(i)); } table.DataColumns.Add(colMean,"Mean"); table.DataColumns.Add(colSd,"Sd"); table.DataColumns.Add(colSe,"Se"); table.DataColumns.Add(colSum,"Sum"); table.DataColumns.Add(colN,"N"); mainDocument.DataTableCollection.Add(table); // create a new worksheet without any columns Current.ProjectService.CreateNewWorksheet(table); } }
public static void StatisticsOnRows( Altaxo.AltaxoDocument mainDocument, Altaxo.Data.DataTable srctable, IAscendingIntegerCollection selectedColumns, IAscendingIntegerCollection selectedRows ) { bool bUseSelectedColumns = (null!=selectedColumns && 0!=selectedColumns.Count); int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount; if(numcols==0) return; // nothing selected bool bUseSelectedRows = (null!=selectedRows && 0!=selectedRows.Count); int numrows = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount; if(numrows==0) return; Altaxo.Data.DataTable table = new Altaxo.Data.DataTable(); // add a text column and some double columns // note: statistics is only possible for numeric columns since // otherwise in one column doubles and i.e. dates are mixed, which is not possible // 1st column is the mean, and holds the sum during the calculation Data.DoubleColumn c1 = new Data.DoubleColumn(); // 2rd column is the standard deviation, and holds the square sum during calculation Data.DoubleColumn c2 = new Data.DoubleColumn(); // 3th column is the standard e (N) Data.DoubleColumn c3 = new Data.DoubleColumn(); // 4th column is the sum Data.DoubleColumn c4 = new Data.DoubleColumn(); // 5th column is the number of items for statistics Data.DoubleColumn c5 = new Data.DoubleColumn(); table.DataColumns.Add(c1,"Mean"); table.DataColumns.Add(c2,"sd"); table.DataColumns.Add(c3,"se"); table.DataColumns.Add(c4,"Sum"); table.DataColumns.Add(c5,"N"); table.Suspend(); // first fill the cols c1, c2, c5 with zeros because we want to sum up for(int i=0;i<numrows;i++) { c1[i]=0; c2[i]=0; c5[i]=0; } for(int si=0;si<numcols;si++) { Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si]; if(!(col is Altaxo.Data.INumericColumn)) continue; // now do the statistics Data.INumericColumn ncol = (Data.INumericColumn)col; for(int i=0;i<numrows;i++) { double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i]; if(Double.IsNaN(val)) continue; c1[i] += val; c2[i] += val*val; c5[i] += 1; } } // for all selected columns // now calculate the statistics for(int i=0;i<numrows;i++) { // now fill a new row in the worksheet double NN=c5[i]; double sum=c1[i]; double sumsqr=c2[i]; if(NN>0) { double mean = c1[i]/NN; double ymy0sqr = sumsqr - sum*sum/NN; if(ymy0sqr<0) ymy0sqr=0; // if this is lesser zero, it is a rounding error, so set it to zero double sd = NN>1 ? Math.Sqrt(ymy0sqr/(NN-1)) : 0; double se = sd/Math.Sqrt(NN); c1[i] = mean; // mean c2[i] = sd; c3[i] = se; c4[i] = sum; c5[i] = NN; } } // for all rows // if a table was created, we add the table to the data set and // create a worksheet if(null!=table) { table.Resume(); mainDocument.DataTableCollection.Add(table); // create a new worksheet without any columns Current.ProjectService.CreateNewWorksheet(table); } }