Beispiel #1
0
 private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata,
                                        Func <IList <double>, double> func)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[,] newExCols  = new double[mdata.RowCount, groupNames.Length];
     double[,] newQuality = new double[mdata.RowCount, groupNames.Length];
     bool[,] newImputed   = new bool[mdata.RowCount, groupNames.Length];
     for (int i = 0; i < newExCols.GetLength(0); i++)
     {
         for (int j = 0; j < newExCols.GetLength(1); j++)
         {
             List <double> vals = new List <double>();
             List <bool>   imps = new List <bool>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                     imps.Add(mdata.IsImputed[i, ind]);
                 }
             }
             bool   imp = false;
             double xy  = double.NaN;
             if (vals.Count >= validVals)
             {
                 xy  = func(vals);
                 imp = ArrayUtils.Or(imps);
             }
             newExCols[i, j]  = xy;
             newQuality[i, j] = double.NaN;
             newImputed[i, j] = imp;
         }
     }
     mdata.ColumnNames        = new List <string>(groupNames);
     mdata.ColumnDescriptions = GetEmpty(groupNames);
     mdata.Values.Set(newExCols);
     mdata.Quality.Set(newQuality);
     mdata.IsImputed.Set(newImputed);
     mdata.RemoveCategoryRowAt(groupColInd);
     for (int i = 0; i < mdata.CategoryRowCount; i++)
     {
         mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i);
     }
     for (int i = 0; i < mdata.NumericRows.Count; i++)
     {
         mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds);
     }
 }
Beispiel #2
0
 private static void AddStandardDeviation(int groupColInd, int validVals, IMatrixData mdata, int varInd)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++)
     {
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++)
     {
         for (int j = 0; j < groupNames.Length; j++)
         {
             List <double> vals = new List <double>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                 }
             }
             double xy = double.NaN;
             if (vals.Count >= validVals)
             {
                 if (varInd == 0)
                 {
                     xy = ArrayUtils.StandardDeviation(vals);
                 }
                 else
                 {
                     xy = ArrayUtils.StandardDeviation(vals) / Math.Sqrt(vals.Count);
                 }
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++)
     {
         string name = "stddev " + groupNames[i];
         mdata.AddNumericColumn(name, name, newNumCols[i]);
     }
 }
Beispiel #3
0
 private static void FillMatrixKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++)
     {
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++)
     {
         for (int j = 0; j < groupNames.Length; j++)
         {
             List <double> vals = new List <double>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                 }
             }
             double xy = double.NaN;
             if (vals.Count >= validVals)
             {
                 xy = func(vals);
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++)
     {
         mdata.AddNumericColumn(groupNames[i], groupNames[i], newNumCols[i]);
     }
 }
Beispiel #4
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> xp = param.GetParamWithSubParams <int>("Expression column selection");
            bool groups = xp.Value == 2;

            string[] groupNames    = null;
            int[][]  colIndsGroups = null;
            if (groups)
            {
                int        groupRowInd = xp.GetSubParameters().GetParam <int>("Group").Value;
                string[][] groupCol    = mdata.GetCategoryRowAt(groupRowInd);
                groupNames    = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
                colIndsGroups = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
            }
            int[] useCols = xp.Value == 1
                                ? xp.GetSubParameters().GetParam <int[]>("Columns").Value
                                : ArrayUtils.ConsecutiveInts(mdata.ColumnCount);
            HashSet <int> w = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value);

            bool[]       include  = new bool[procs.Length];
            double[][]   columns  = new double[procs.Length][];
            double[][][] columnsG = null;
            if (groups)
            {
                columnsG = new double[procs.Length][][];
                for (int i = 0; i < columnsG.Length; i++)
                {
                    columnsG[i] = new double[groupNames.Length][];
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                include[i] = w.Contains(i);
                if (include[i])
                {
                    columns[i] = new double[mdata.RowCount];
                    if (groups)
                    {
                        for (int j = 0; j < groupNames.Length; j++)
                        {
                            columnsG[i][j] = new double[mdata.RowCount];
                        }
                    }
                }
            }
            for (int i = 0; i < mdata.RowCount; i++)
            {
                List <double> v = new List <double>();
                foreach (int j in useCols)
                {
                    double x = mdata.Values.Get(i, j);
                    if (!double.IsNaN(x) && !double.IsInfinity(x))
                    {
                        v.Add(x);
                    }
                }
                for (int j = 0; j < include.Length; j++)
                {
                    if (include[j])
                    {
                        columns[j][i] = procs[j].Item2(v);
                    }
                }
                if (groups)
                {
                    List <double>[] vg = new List <double> [groupNames.Length];
                    for (int j = 0; j < colIndsGroups.Length; j++)
                    {
                        vg[j] = new List <double>();
                        for (int k = 0; k < colIndsGroups[j].Length; k++)
                        {
                            double x = mdata.Values.Get(i, colIndsGroups[j][k]);
                            if (!double.IsNaN(x) && !double.IsInfinity(x))
                            {
                                vg[j].Add(x);
                            }
                        }
                    }
                    for (int j = 0; j < include.Length; j++)
                    {
                        if (include[j])
                        {
                            for (int k = 0; k < groupNames.Length; k++)
                            {
                                columnsG[j][k][i] = procs[j].Item2(vg[k]);
                            }
                        }
                    }
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                if (include[i])
                {
                    mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]);
                    if (groups)
                    {
                        for (int k = 0; k < groupNames.Length; k++)
                        {
                            mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]);
                        }
                    }
                }
            }
        }