Ejemplo n.º 1
0
		/// <summary>
		/// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to.
		/// </summary>
		/// <param name="srctable">Source table.</param>
		/// <param name="selectedColumns">Selected data columns in the source table.</param>
		/// <param name="selectedRows">Selected rows in the source table.</param>
		/// <param name="destinationTable">The table where the statistical results are written to.</param>
		public static void DoStatisticsOnRows(
			this DataColumnCollection srctable,
			IAscendingIntegerCollection selectedColumns,
			IAscendingIntegerCollection selectedRows,
			DataColumnCollection destinationTable
			)
		{
			bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count);
			int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount;
			if (numcols == 0)
				return; // nothing selected

			bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count);
			int numrows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount;
			if (numrows == 0)
				return;

			Data.DoubleColumn cRows = new DoubleColumn();

			// 1st column is the mean, and holds the sum during the calculation
			Data.DoubleColumn colMean = new Data.DoubleColumn();

			// 2rd column is the standard deviation, and holds the square sum during calculation
			Data.DoubleColumn colSD = new Data.DoubleColumn();

			// 3th column is the standard e (N)
			Data.DoubleColumn colSE = new Data.DoubleColumn();

			// 4th column is the sum
			Data.DoubleColumn colSum = new Data.DoubleColumn();

			// 5th column is the number of items for statistics
			Data.DoubleColumn colNN = new Data.DoubleColumn();

			var colSumSqr = new Data.DoubleColumn();
			var colFracOneSigma = new Data.DoubleColumn();
			var colFracTwoSigma = new Data.DoubleColumn();
			var colFracThreeSigma = new Data.DoubleColumn();
			var colMinimum = new DoubleColumn();
			var colMaximum = new DoubleColumn();

			// first fill the cols c1, c2, c5 with zeros because we want to sum up
			for (int i = 0; i < numrows; i++)
			{
				colSum[i] = 0;
				colSumSqr[i] = 0;
				colNN[i] = 0;
				colMinimum[i] = double.PositiveInfinity;
				colMaximum[i] = double.NegativeInfinity;
			}

			for (int si = 0; si < numcols; si++)
			{
				Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
				if (!(col is Altaxo.Data.INumericColumn))
					continue;

				// now do the statistics
				Data.INumericColumn ncol = (Data.INumericColumn)col;
				for (int i = 0; i < numrows; i++)
				{
					int row = bUseSelectedRows ? selectedRows[i] : i;
					cRows[i] = row;

					double val = ncol[row];
					if (Double.IsNaN(val))
						continue;

					colSum[i] += val;
					colSumSqr[i] += val * val;
					colNN[i] += 1;
					colMinimum[i] = Math.Min(colMinimum[i], val);
					colMaximum[i] = Math.Max(colMaximum[i], val);
				}
			} // for all selected columns

			// now calculate the statistics
			for (int i = 0; i < numrows; i++)
			{
				// now fill a new row in the worksheet
				double NN = colNN[i];
				double sum = colSum[i];
				double sumsqr = colSumSqr[i];
				if (NN > 0)
				{
					double mean = sum / NN;
					double ymy0sqr = sumsqr - sum * sum / NN;
					if (ymy0sqr < 0) ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero
					double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0;
					double se = sd / Math.Sqrt(NN);

					colMean[i] = mean; // mean
					colSD[i] = sd;
					colSE[i] = se;
				}
				else
				{
					colMinimum[i] = double.NaN;
					colMaximum[i] = double.NaN;
				}
			} // for all rows

			// calculate fractions

			for (int i = 0; i < numrows; i++)
			{
				int row = bUseSelectedRows ? selectedRows[i] : i;

				double mean = colMean[i];
				double sd = colSD[i];

				// calculate fractions
				double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd;
				double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd;
				double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd;
				int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0;

				for (int si = 0; si < numcols; si++)
				{
					Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
					if (!(col is Altaxo.Data.INumericColumn))
						continue;

					// now do the statistics
					Data.INumericColumn ncol = (Data.INumericColumn)col;
					double val = ncol[row];
					if (Double.IsNaN(val))
						continue;

					if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) ++cntOneSigma;
					if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) ++cntTwoSigma;
					if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) ++cntThreeSigma;
				}

				colFracOneSigma[i] = cntOneSigma / (double)colNN[i];
				colFracTwoSigma[i] = cntTwoSigma / (double)colNN[i];
				colFracThreeSigma[i] = cntThreeSigma / (double)colNN[i];
			}

			destinationTable.EnsureExistence(DefaultRowNumberColumnName, typeof(DoubleColumn), ColumnKind.X, 0).Append(cRows);
			AppendStatisticalData(destinationTable, colMean, colSD, colSE, colSum, colSumSqr, colNN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum);
		}
Ejemplo n.º 2
0
		/// <summary>
		/// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to.
		/// </summary>
		/// <param name="srctable">Source table.</param>
		/// <param name="selectedColumns">Selected data columns in the source table. If the argument is null, all columns will be used.</param>
		/// <param name="selectedRows">Selected rows in the source table. If the argument is null, all rows will be used.</param>
		/// <param name="destinationTable">The table where the statistical results are written to.</param>
		public static void DoStatisticsOnColumns(
			this DataColumnCollection srctable,
			IAscendingIntegerCollection selectedColumns,
			IAscendingIntegerCollection selectedRows,
			DataColumnCollection destinationTable
			)
		{
			bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count);
			int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount;

			bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count);

			if (numcols == 0)
				return; // nothing selected

			// add a text column and some double columns
			// note: statistics is only possible for numeric columns since
			// otherwise in one column doubles and i.e. dates are mixed, which is not possible

			// 1st column is the name of the column of which the statistics is made
			Data.TextColumn colCol = new Data.TextColumn();

			// 2nd column is the mean
			Data.DoubleColumn colMean = new Data.DoubleColumn();

			// 3rd column is the standard deviation
			Data.DoubleColumn colSd = new Data.DoubleColumn();

			// 4th column is the standard e (N)
			Data.DoubleColumn colSe = new Data.DoubleColumn();

			// 5th column is the sum
			Data.DoubleColumn colSum = new Data.DoubleColumn();

			var colSumSqr = new Data.DoubleColumn();

			// 6th column is the number of items for statistics
			Data.DoubleColumn colN = new Data.DoubleColumn();

			var colFracOneSigma = new Data.DoubleColumn();
			var colFracTwoSigma = new Data.DoubleColumn();
			var colFracThreeSigma = new Data.DoubleColumn();

			var colMinimum = new DoubleColumn(); // Minimum of the values
			var colMaximum = new DoubleColumn(); // Maximum of the values

			int currRow = 0;
			for (int si = 0; si < numcols; si++)
			{
				Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
				if (!(col is Altaxo.Data.INumericColumn))
					continue;

				int rows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount;
				if (rows == 0)
					continue;

				// now do the statistics
				Data.INumericColumn ncol = (Data.INumericColumn)col;
				double sum = 0;
				double sumsqr = 0;
				int NN = 0;
				double minimum = double.PositiveInfinity;
				double maximum = double.NegativeInfinity;

				for (int i = 0; i < rows; i++)
				{
					double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
					if (Double.IsNaN(val))
						continue;

					NN++;
					sum += val;
					sumsqr += (val * val);
					minimum = Math.Min(minimum, val);
					maximum = Math.Max(maximum, val);
				}
				// now fill a new row in the worksheet

				double mean = sum / NN;
				double ymy0sqr = sumsqr - sum * sum / NN;
				if (ymy0sqr < 0) ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero
				double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0;
				double se = sd / Math.Sqrt(NN);

				// calculate fractions
				double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd;
				double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd;
				double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd;
				int cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0;

				for (int i = 0; i < rows; i++)
				{
					double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
					if (Double.IsNaN(val))
						continue;

					if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi)) ++cntOneSigma;
					if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi)) ++cntTwoSigma;
					if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi)) ++cntThreeSigma;
				}

				if (0 == NN)
				{
					minimum = maximum = double.NaN;
				}

				colCol[currRow] = col.Name;
				colMean[currRow] = mean; // mean
				colSd[currRow] = sd;
				colSe[currRow] = se;
				colSum[currRow] = sum;
				colSumSqr[currRow] = sumsqr;
				colN[currRow] = NN;
				colFracOneSigma[currRow] = cntOneSigma / (double)NN;
				colFracTwoSigma[currRow] = cntTwoSigma / (double)NN;
				colFracThreeSigma[currRow] = cntThreeSigma / (double)NN;
				colMinimum[currRow] = minimum;
				colMaximum[currRow] = maximum;
				currRow++; // for the next column
			} // for all selected columns

			if (currRow != 0)
			{
				destinationTable.EnsureExistence(DefaultColumnNameColumnName, typeof(TextColumn), ColumnKind.X, 0).Append(colCol);
				AppendStatisticalData(destinationTable, colMean, colSd, colSe, colSum, colSumSqr, colN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum);
			}
		}
Ejemplo n.º 3
0
        public static void StatisticsOnRows(
            Altaxo.AltaxoDocument mainDocument,
            Altaxo.Data.DataTable srctable,
            IAscendingIntegerCollection selectedColumns,
            IAscendingIntegerCollection selectedRows
            )
        {
            bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count);
            int  numcols             = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount;

            if (numcols == 0)
            {
                return; // nothing selected
            }
            bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count);
            int  numrows          = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount;

            if (numrows == 0)
            {
                return;
            }

            Altaxo.Data.DataTable table = new Altaxo.Data.DataTable();
            // add a text column and some double columns
            // note: statistics is only possible for numeric columns since
            // otherwise in one column doubles and i.e. dates are mixed, which is not possible

            // 1st column is the mean, and holds the sum during the calculation
            Data.DoubleColumn c1 = new Data.DoubleColumn();

            // 2rd column is the standard deviation, and holds the square sum during calculation
            Data.DoubleColumn c2 = new Data.DoubleColumn();

            // 3th column is the standard e (N)
            Data.DoubleColumn c3 = new Data.DoubleColumn();

            // 4th column is the sum
            Data.DoubleColumn c4 = new Data.DoubleColumn();

            // 5th column is the number of items for statistics
            Data.DoubleColumn c5 = new Data.DoubleColumn();

            table.DataColumns.Add(c1, "Mean");
            table.DataColumns.Add(c2, "sd");
            table.DataColumns.Add(c3, "se");
            table.DataColumns.Add(c4, "Sum");
            table.DataColumns.Add(c5, "N");

            table.Suspend();


            // first fill the cols c1, c2, c5 with zeros because we want to sum up
            for (int i = 0; i < numrows; i++)
            {
                c1[i] = 0;
                c2[i] = 0;
                c5[i] = 0;
            }


            for (int si = 0; si < numcols; si++)
            {
                Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
                if (!(col is Altaxo.Data.INumericColumn))
                {
                    continue;
                }

                // now do the statistics
                Data.INumericColumn ncol = (Data.INumericColumn)col;
                for (int i = 0; i < numrows; i++)
                {
                    double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
                    if (Double.IsNaN(val))
                    {
                        continue;
                    }

                    c1[i] += val;
                    c2[i] += val * val;
                    c5[i] += 1;
                }
            } // for all selected columns


            // now calculate the statistics
            for (int i = 0; i < numrows; i++)
            {
                // now fill a new row in the worksheet
                double NN     = c5[i];
                double sum    = c1[i];
                double sumsqr = c2[i];
                if (NN > 0)
                {
                    double mean    = c1[i] / NN;
                    double ymy0sqr = sumsqr - sum * sum / NN;
                    if (ymy0sqr < 0)
                    {
                        ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero
                    }
                    double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0;
                    double se = sd / Math.Sqrt(NN);

                    c1[i] = mean; // mean
                    c2[i] = sd;
                    c3[i] = se;
                    c4[i] = sum;
                    c5[i] = NN;
                }
            } // for all rows

            // if a table was created, we add the table to the data set and
            // create a worksheet
            if (null != table)
            {
                table.Resume();
                mainDocument.DataTableCollection.Add(table);
                // create a new worksheet without any columns
                Current.ProjectService.CreateNewWorksheet(table);
            }
        }
Ejemplo n.º 4
0
        public static void StatisticsOnColumns(
            Altaxo.AltaxoDocument mainDocument,
            Altaxo.Data.DataTable srctable,
            IAscendingIntegerCollection selectedColumns,
            IAscendingIntegerCollection selectedRows
            )
        {
            bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count);
            int  numcols             = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount;

            bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count);

            if (numcols == 0)
            {
                return;                  // nothing selected
            }
            Data.DataTable table = null; // the created table


            // add a text column and some double columns
            // note: statistics is only possible for numeric columns since
            // otherwise in one column doubles and i.e. dates are mixed, which is not possible

            // 1st column is the name of the column of which the statistics is made
            Data.TextColumn colCol = new Data.TextColumn();

            // 2nd column is the mean
            Data.DoubleColumn colMean = new Data.DoubleColumn();

            // 3rd column is the standard deviation
            Data.DoubleColumn colSd = new Data.DoubleColumn();

            // 4th column is the standard e (N)
            Data.DoubleColumn colSe = new Data.DoubleColumn();

            // 5th column is the sum
            Data.DoubleColumn colSum = new Data.DoubleColumn();

            // 6th column is the number of items for statistics
            Data.DoubleColumn colN = new Data.DoubleColumn();

            int currRow = 0;

            for (int si = 0; si < numcols; si++)
            {
                Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
                if (!(col is Altaxo.Data.INumericColumn))
                {
                    continue;
                }

                int rows = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount;
                if (rows == 0)
                {
                    continue;
                }

                // now do the statistics
                Data.INumericColumn ncol = (Data.INumericColumn)col;
                double sum    = 0;
                double sumsqr = 0;
                int    NN     = 0;
                for (int i = 0; i < rows; i++)
                {
                    double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
                    if (Double.IsNaN(val))
                    {
                        continue;
                    }

                    NN++;
                    sum    += val;
                    sumsqr += (val * val);
                }
                // now fill a new row in the worksheet

                if (NN > 0)
                {
                    double mean    = sum / NN;
                    double ymy0sqr = sumsqr - sum * sum / NN;
                    if (ymy0sqr < 0)
                    {
                        ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero
                    }
                    double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0;
                    double se = sd / Math.Sqrt(NN);

                    colCol[currRow]  = col.Name;
                    colMean[currRow] = mean; // mean
                    colSd[currRow]   = sd;
                    colSe[currRow]   = se;
                    colSum[currRow]  = sum;
                    colN[currRow]    = NN;
                    currRow++; // for the next column
                }
            } // for all selected columns


            if (currRow != 0)
            {
                table = new Altaxo.Data.DataTable("Statistics of " + srctable.Name);
                table.DataColumns.Add(colCol, "Col", Altaxo.Data.ColumnKind.X);

                // new : add a copy of all property columns; can be usefull
                for (int i = 0; i < srctable.PropertyColumnCount; i++)
                {
                    DataColumn originalColumn = srctable.PropertyColumns[i];
                    DataColumn clonedColumn   = (DataColumn)originalColumn.Clone();
                    clonedColumn.Clear();
                    for (int si = 0; si < numcols; si++)
                    {
                        int idx = bUseSelectedColumns ? selectedColumns[si] : si;
                        clonedColumn[si] = originalColumn[idx];
                    }
                    table.DataColumns.Add(clonedColumn, srctable.PropertyColumns.GetColumnName(i), srctable.PropertyColumns.GetColumnKind(i), srctable.PropertyColumns.GetColumnGroup(i));
                }

                table.DataColumns.Add(colMean, "Mean");
                table.DataColumns.Add(colSd, "Sd");
                table.DataColumns.Add(colSe, "Se");
                table.DataColumns.Add(colSum, "Sum");
                table.DataColumns.Add(colN, "N");

                mainDocument.DataTableCollection.Add(table);
                // create a new worksheet without any columns
                Current.ProjectService.CreateNewWorksheet(table);
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to.
        /// </summary>
        /// <param name="srctable">Source table.</param>
        /// <param name="selectedColumns">Selected data columns in the source table. If the argument is null, all columns will be used.</param>
        /// <param name="selectedRows">Selected rows in the source table. If the argument is null, all rows will be used.</param>
        /// <param name="destinationTable">The table where the statistical results are written to.</param>
        public static void DoStatisticsOnColumns(
            this DataColumnCollection srctable,
            IAscendingIntegerCollection selectedColumns,
            IAscendingIntegerCollection selectedRows,
            DataColumnCollection destinationTable
            )
        {
            bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count);
            int  numcols             = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount;

            bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count);

            if (numcols == 0)
            {
                return; // nothing selected
            }
            // add a text column and some double columns
            // note: statistics is only possible for numeric columns since
            // otherwise in one column doubles and i.e. dates are mixed, which is not possible

            // 1st column is the name of the column of which the statistics is made
            var colCol = new Data.TextColumn();

            // 2nd column is the mean
            var colMean = new Data.DoubleColumn();

            // 3rd column is the standard deviation
            var colSd = new Data.DoubleColumn();

            // 4th column is the standard e (N)
            var colSe = new Data.DoubleColumn();

            // 5th column is the sum
            var colSum = new Data.DoubleColumn();

            var colSumSqr = new Data.DoubleColumn();

            // 6th column is the number of items for statistics
            var colN = new Data.DoubleColumn();

            var colFracOneSigma   = new Data.DoubleColumn();
            var colFracTwoSigma   = new Data.DoubleColumn();
            var colFracThreeSigma = new Data.DoubleColumn();

            var colMinimum = new DoubleColumn(); // Minimum of the values
            var colMaximum = new DoubleColumn(); // Maximum of the values

            int currRow = 0;

            for (int si = 0; si < numcols; si++)
            {
                Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
                if (!(col is Altaxo.Data.INumericColumn))
                {
                    continue;
                }

                int rows = bUseSelectedRows ? selectedRows.Count : srctable.RowCount;
                if (rows == 0)
                {
                    continue;
                }

                // now do the statistics
                var    ncol    = (Data.INumericColumn)col;
                double sum     = 0;
                double sumsqr  = 0;
                int    NN      = 0;
                double minimum = double.PositiveInfinity;
                double maximum = double.NegativeInfinity;

                for (int i = 0; i < rows; i++)
                {
                    double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
                    if (double.IsNaN(val))
                    {
                        continue;
                    }

                    NN++;
                    sum    += val;
                    sumsqr += (val * val);
                    minimum = Math.Min(minimum, val);
                    maximum = Math.Max(maximum, val);
                }
                // now fill a new row in the worksheet

                double mean    = sum / NN;
                double ymy0sqr = sumsqr - sum * sum / NN;
                if (ymy0sqr < 0)
                {
                    ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero
                }
                double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0;
                double se = sd / Math.Sqrt(NN);

                // calculate fractions
                double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd;
                double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd;
                double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd;
                int    cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0;

                for (int i = 0; i < rows; i++)
                {
                    double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
                    if (double.IsNaN(val))
                    {
                        continue;
                    }

                    if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi))
                    {
                        ++cntOneSigma;
                    }
                    if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi))
                    {
                        ++cntTwoSigma;
                    }
                    if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi))
                    {
                        ++cntThreeSigma;
                    }
                }

                if (0 == NN)
                {
                    minimum = maximum = double.NaN;
                }

                colCol[currRow]            = col.Name;
                colMean[currRow]           = mean; // mean
                colSd[currRow]             = sd;
                colSe[currRow]             = se;
                colSum[currRow]            = sum;
                colSumSqr[currRow]         = sumsqr;
                colN[currRow]              = NN;
                colFracOneSigma[currRow]   = cntOneSigma / (double)NN;
                colFracTwoSigma[currRow]   = cntTwoSigma / (double)NN;
                colFracThreeSigma[currRow] = cntThreeSigma / (double)NN;
                colMinimum[currRow]        = minimum;
                colMaximum[currRow]        = maximum;
                currRow++; // for the next column
            } // for all selected columns

            if (currRow != 0)
            {
                destinationTable.EnsureExistence(DefaultColumnNameColumnName, typeof(TextColumn), ColumnKind.X, 0).Append(colCol);
                AppendStatisticalData(destinationTable, colMean, colSd, colSe, colSum, colSumSqr, colN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum);
            }
        }
Ejemplo n.º 6
0
 private static void AppendStatisticalData(DataColumnCollection destinationTable, Data.DoubleColumn colMean, Data.DoubleColumn colSd, Data.DoubleColumn colSe, Data.DoubleColumn colSum, Data.DoubleColumn colSumSqr, Data.DoubleColumn colN, Data.DoubleColumn fracOneSigma, Data.DoubleColumn fracTwoSigma, Data.DoubleColumn fracThreeSigma, DoubleColumn minimum, DoubleColumn maximum)
 {
     destinationTable.EnsureExistence(DefaultMeanColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colMean);
     destinationTable.EnsureExistence(DefaultStandardErrorColumnName, typeof(DoubleColumn), ColumnKind.Err, 0).Append(colSe);
     destinationTable.EnsureExistence(DefaultStandardDeviationColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSd);
     destinationTable.EnsureExistence(DefaultSumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSum);
     destinationTable.EnsureExistence(DefaultSumSqrColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colSumSqr);
     destinationTable.EnsureExistence(DefaultNumberOfItemsColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(colN);
     destinationTable.EnsureExistence(DefaultFractionInOneSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracOneSigma);
     destinationTable.EnsureExistence(DefaultFractionInTwoSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracTwoSigma);
     destinationTable.EnsureExistence(DefaultFractionInThreeSigmaColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(fracThreeSigma);
     destinationTable.EnsureExistence(DefaultMinimumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(minimum);
     destinationTable.EnsureExistence(DefaultMaximumColumnName, typeof(DoubleColumn), ColumnKind.V, 0).Append(maximum);
 }
Ejemplo n.º 7
0
        /// <summary>
        /// Calculates statistics of selected columns. Creates a new table where the statistical data will be written to.
        /// </summary>
        /// <param name="srctable">Source table.</param>
        /// <param name="selectedColumns">Selected data columns in the source table.</param>
        /// <param name="selectedRows">Selected rows in the source table.</param>
        /// <param name="destinationTable">The table where the statistical results are written to.</param>
        public static void DoStatisticsOnRows(
            this DataColumnCollection srctable,
            IAscendingIntegerCollection selectedColumns,
            IAscendingIntegerCollection selectedRows,
            DataColumnCollection destinationTable
            )
        {
            bool bUseSelectedColumns = (null != selectedColumns && 0 != selectedColumns.Count);
            int  numcols             = bUseSelectedColumns ? selectedColumns.Count : srctable.ColumnCount;

            if (numcols == 0)
            {
                return; // nothing selected
            }
            bool bUseSelectedRows = (null != selectedRows && 0 != selectedRows.Count);
            int  numrows          = bUseSelectedRows ? selectedRows.Count : srctable.RowCount;

            if (numrows == 0)
            {
                return;
            }

            var cRows = new DoubleColumn();

            // 1st column is the mean, and holds the sum during the calculation
            var colMean = new Data.DoubleColumn();

            // 2rd column is the standard deviation, and holds the square sum during calculation
            var colSD = new Data.DoubleColumn();

            // 3th column is the standard e (N)
            var colSE = new Data.DoubleColumn();

            // 4th column is the sum
            var colSum = new Data.DoubleColumn();

            // 5th column is the number of items for statistics
            var colNN = new Data.DoubleColumn();

            var colSumSqr         = new Data.DoubleColumn();
            var colFracOneSigma   = new Data.DoubleColumn();
            var colFracTwoSigma   = new Data.DoubleColumn();
            var colFracThreeSigma = new Data.DoubleColumn();
            var colMinimum        = new DoubleColumn();
            var colMaximum        = new DoubleColumn();

            // first fill the cols c1, c2, c5 with zeros because we want to sum up
            for (int i = 0; i < numrows; i++)
            {
                colSum[i]     = 0;
                colSumSqr[i]  = 0;
                colNN[i]      = 0;
                colMinimum[i] = double.PositiveInfinity;
                colMaximum[i] = double.NegativeInfinity;
            }

            for (int si = 0; si < numcols; si++)
            {
                Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
                if (!(col is Altaxo.Data.INumericColumn))
                {
                    continue;
                }

                // now do the statistics
                var ncol = (Data.INumericColumn)col;
                for (int i = 0; i < numrows; i++)
                {
                    int row = bUseSelectedRows ? selectedRows[i] : i;
                    cRows[i] = row;

                    double val = ncol[row];
                    if (double.IsNaN(val))
                    {
                        continue;
                    }

                    colSum[i]    += val;
                    colSumSqr[i] += val * val;
                    colNN[i]     += 1;
                    colMinimum[i] = Math.Min(colMinimum[i], val);
                    colMaximum[i] = Math.Max(colMaximum[i], val);
                }
            } // for all selected columns

            // now calculate the statistics
            for (int i = 0; i < numrows; i++)
            {
                // now fill a new row in the worksheet
                double NN     = colNN[i];
                double sum    = colSum[i];
                double sumsqr = colSumSqr[i];
                if (NN > 0)
                {
                    double mean    = sum / NN;
                    double ymy0sqr = sumsqr - sum * sum / NN;
                    if (ymy0sqr < 0)
                    {
                        ymy0sqr = 0; // if this is lesser zero, it is a rounding error, so set it to zero
                    }
                    double sd = NN > 1 ? Math.Sqrt(ymy0sqr / (NN - 1)) : 0;
                    double se = sd / Math.Sqrt(NN);

                    colMean[i] = mean; // mean
                    colSD[i]   = sd;
                    colSE[i]   = se;
                }
                else
                {
                    colMinimum[i] = double.NaN;
                    colMaximum[i] = double.NaN;
                }
            } // for all rows

            // calculate fractions

            for (int i = 0; i < numrows; i++)
            {
                int row = bUseSelectedRows ? selectedRows[i] : i;

                double mean = colMean[i];
                double sd   = colSD[i];

                // calculate fractions
                double oneSigmaLo = mean - 1 * sd, oneSigmaHi = mean + 1 * sd;
                double twoSigmaLo = mean - 2 * sd, twoSigmaHi = mean + 2 * sd;
                double threeSigmaLo = mean - 3 * sd, threeSigmaHi = mean + 3 * sd;
                int    cntOneSigma = 0, cntTwoSigma = 0, cntThreeSigma = 0;

                for (int si = 0; si < numcols; si++)
                {
                    Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
                    if (!(col is Altaxo.Data.INumericColumn))
                    {
                        continue;
                    }

                    // now do the statistics
                    var    ncol = (Data.INumericColumn)col;
                    double val  = ncol[row];
                    if (double.IsNaN(val))
                    {
                        continue;
                    }

                    if (Altaxo.Calc.RMath.IsInIntervalCC(val, oneSigmaLo, oneSigmaHi))
                    {
                        ++cntOneSigma;
                    }
                    if (Altaxo.Calc.RMath.IsInIntervalCC(val, twoSigmaLo, twoSigmaHi))
                    {
                        ++cntTwoSigma;
                    }
                    if (Altaxo.Calc.RMath.IsInIntervalCC(val, threeSigmaLo, threeSigmaHi))
                    {
                        ++cntThreeSigma;
                    }
                }

                colFracOneSigma[i]   = cntOneSigma / colNN[i];
                colFracTwoSigma[i]   = cntTwoSigma / colNN[i];
                colFracThreeSigma[i] = cntThreeSigma / colNN[i];
            }

            destinationTable.EnsureExistence(DefaultRowNumberColumnName, typeof(DoubleColumn), ColumnKind.X, 0).Append(cRows);
            AppendStatisticalData(destinationTable, colMean, colSD, colSE, colSum, colSumSqr, colNN, colFracOneSigma, colFracTwoSigma, colFracThreeSigma, colMinimum, colMaximum);
        }
Ejemplo n.º 8
0
    public static void StatisticsOnColumns(
      Altaxo.AltaxoDocument mainDocument,
      Altaxo.Data.DataTable srctable,
      IAscendingIntegerCollection selectedColumns,
      IAscendingIntegerCollection selectedRows
      )
    {
      bool bUseSelectedColumns = (null!=selectedColumns && 0!=selectedColumns.Count);
      int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount;

      bool bUseSelectedRows = (null!=selectedRows && 0!=selectedRows.Count);

      if(numcols==0)
        return; // nothing selected

      Data.DataTable table = null; // the created table


      // add a text column and some double columns
      // note: statistics is only possible for numeric columns since
      // otherwise in one column doubles and i.e. dates are mixed, which is not possible

      // 1st column is the name of the column of which the statistics is made
      Data.TextColumn colCol = new Data.TextColumn();
    
      // 2nd column is the mean
      Data.DoubleColumn colMean = new Data.DoubleColumn();

      // 3rd column is the standard deviation
      Data.DoubleColumn colSd = new Data.DoubleColumn();

      // 4th column is the standard e (N)
      Data.DoubleColumn colSe = new Data.DoubleColumn();

      // 5th column is the sum
      Data.DoubleColumn colSum = new Data.DoubleColumn();

      // 6th column is the number of items for statistics
      Data.DoubleColumn colN = new Data.DoubleColumn();

      int currRow=0;
      for(int si=0;si<numcols;si++)
      {
        Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
        if(!(col is Altaxo.Data.INumericColumn))
          continue;

        int rows = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount;
        if(rows==0)
          continue;

        // now do the statistics 
        Data.INumericColumn ncol = (Data.INumericColumn)col;
        double sum=0;
        double sumsqr=0;
        int NN=0;
        for(int i=0;i<rows;i++)
        {
          double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
          if(Double.IsNaN(val))
            continue;

          NN++;
          sum+=val;
          sumsqr+=(val*val);
        }
        // now fill a new row in the worksheet

        if(NN>0)
        {
          double mean = sum/NN;
          double ymy0sqr = sumsqr - sum*sum/NN;
          if(ymy0sqr<0) ymy0sqr=0; // if this is lesser zero, it is a rounding error, so set it to zero
          double sd = NN>1 ? Math.Sqrt(ymy0sqr/(NN-1)) : 0;
          double se = sd/Math.Sqrt(NN);

          colCol[currRow] = col.Name;
          colMean[currRow] = mean; // mean
          colSd[currRow] = sd;
          colSe[currRow] = se;
          colSum[currRow] = sum;
          colN[currRow] = NN;
          currRow++; // for the next column
        }
      } // for all selected columns
      
  
      if(currRow!=0)
      {
        table = new Altaxo.Data.DataTable("Statistics of " + srctable.Name);
        table.DataColumns.Add(colCol,"Col",Altaxo.Data.ColumnKind.X);

        // new : add a copy of all property columns; can be usefull
        for (int i = 0; i < srctable.PropertyColumnCount; i++)
        {
          DataColumn originalColumn = srctable.PropertyColumns[i];
          DataColumn clonedColumn = (DataColumn)originalColumn.Clone();
          clonedColumn.Clear();
          for (int si = 0; si < numcols; si++)
          {
            int idx = bUseSelectedColumns ? selectedColumns[si] : si;
            clonedColumn[si] = originalColumn[idx];
          }
          table.DataColumns.Add(clonedColumn, srctable.PropertyColumns.GetColumnName(i), srctable.PropertyColumns.GetColumnKind(i), srctable.PropertyColumns.GetColumnGroup(i));
        }

        table.DataColumns.Add(colMean,"Mean");
        table.DataColumns.Add(colSd,"Sd");
        table.DataColumns.Add(colSe,"Se");
        table.DataColumns.Add(colSum,"Sum");
        table.DataColumns.Add(colN,"N");

        mainDocument.DataTableCollection.Add(table);
        // create a new worksheet without any columns
        Current.ProjectService.CreateNewWorksheet(table);
      }
    }
Ejemplo n.º 9
0
    public static void StatisticsOnRows(
      Altaxo.AltaxoDocument mainDocument,
      Altaxo.Data.DataTable srctable,
      IAscendingIntegerCollection selectedColumns,
      IAscendingIntegerCollection selectedRows
      )
    {
      bool bUseSelectedColumns = (null!=selectedColumns && 0!=selectedColumns.Count);
      int numcols = bUseSelectedColumns ? selectedColumns.Count : srctable.DataColumns.ColumnCount;
      if(numcols==0)
        return; // nothing selected

      bool bUseSelectedRows = (null!=selectedRows && 0!=selectedRows.Count);
      int numrows = bUseSelectedRows ? selectedRows.Count : srctable.DataColumns.RowCount;
      if(numrows==0)
        return;

      Altaxo.Data.DataTable table = new Altaxo.Data.DataTable();
      // add a text column and some double columns
      // note: statistics is only possible for numeric columns since
      // otherwise in one column doubles and i.e. dates are mixed, which is not possible

      // 1st column is the mean, and holds the sum during the calculation
      Data.DoubleColumn c1 = new Data.DoubleColumn();

      // 2rd column is the standard deviation, and holds the square sum during calculation
      Data.DoubleColumn c2 = new Data.DoubleColumn();

      // 3th column is the standard e (N)
      Data.DoubleColumn c3 = new Data.DoubleColumn();

      // 4th column is the sum
      Data.DoubleColumn c4 = new Data.DoubleColumn();

      // 5th column is the number of items for statistics
      Data.DoubleColumn c5 = new Data.DoubleColumn();
      
      table.DataColumns.Add(c1,"Mean");
      table.DataColumns.Add(c2,"sd");
      table.DataColumns.Add(c3,"se");
      table.DataColumns.Add(c4,"Sum");
      table.DataColumns.Add(c5,"N");

      table.Suspend();

      
      // first fill the cols c1, c2, c5 with zeros because we want to sum up 
      for(int i=0;i<numrows;i++)
      {
        c1[i]=0;
        c2[i]=0;
        c5[i]=0;
      }
  
      
      for(int si=0;si<numcols;si++)
      {
        Altaxo.Data.DataColumn col = bUseSelectedColumns ? srctable[selectedColumns[si]] : srctable[si];
        if(!(col is Altaxo.Data.INumericColumn))
          continue;

        // now do the statistics 
        Data.INumericColumn ncol = (Data.INumericColumn)col;
        for(int i=0;i<numrows;i++)
        {
          double val = bUseSelectedRows ? ncol[selectedRows[i]] : ncol[i];
          if(Double.IsNaN(val))
            continue;

          c1[i] += val;
          c2[i] += val*val;
          c5[i] += 1;
        }
      } // for all selected columns

      
      // now calculate the statistics
      for(int i=0;i<numrows;i++)
      {
        // now fill a new row in the worksheet
        double NN=c5[i];
        double sum=c1[i];
        double sumsqr=c2[i];
        if(NN>0)
        {
          double mean = c1[i]/NN;
          double ymy0sqr = sumsqr - sum*sum/NN;
          if(ymy0sqr<0) ymy0sqr=0; // if this is lesser zero, it is a rounding error, so set it to zero
          double sd = NN>1 ? Math.Sqrt(ymy0sqr/(NN-1)) : 0;
          double se = sd/Math.Sqrt(NN);

          c1[i] = mean; // mean
          c2[i] = sd;
          c3[i] = se;
          c4[i] = sum;
          c5[i] = NN;
        }
      } // for all rows
  
      // if a table was created, we add the table to the data set and
      // create a worksheet
      if(null!=table)
      {
        table.Resume();
        mainDocument.DataTableCollection.Add(table);
        // create a new worksheet without any columns
        Current.ProjectService.CreateNewWorksheet(table);

      }
    }