public static void CombineRows(this IMatrixData mdata, List <int> rowIdxs, Func <double[], double> combineNumeric,
                                       Func <string[], string> combineString, Func <string[][], string[]> combineCategory,
                                       Func <double[][], double[]> combineMultiNumeric)
        {
            if (!rowIdxs.Any())
            {
                return;
            }
            int resultRow = rowIdxs[0];

            for (int i = 0; i < mdata.Values.ColumnCount; i++)
            {
                BaseVector column = mdata.Values.GetColumn(i);
                BaseVector values = column.SubArray(rowIdxs);
                mdata.Values[resultRow, i] = combineNumeric(ArrayUtils.ToDoubles(values));
            }
            for (int i = 0; i < mdata.NumericColumnCount; i++)
            {
                double[] column = mdata.NumericColumns[i];
                double[] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineNumeric(values);
            }
            for (int i = 0; i < mdata.StringColumnCount; i++)
            {
                string[] column = mdata.StringColumns[i];
                string[] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineString(values);
            }
            for (int i = 0; i < mdata.CategoryColumnCount; i++)
            {
                string[][] column = mdata.GetCategoryColumnAt(i);
                string[][] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineCategory(values);
                mdata.SetCategoryColumnAt(column, i);
            }
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++)
            {
                double[][] column = mdata.MultiNumericColumns[i];
                double[][] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineMultiNumeric(values);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool keepEmpty = param.GetBoolParam("Keep rows without ID").Value;
            AverageType atype = GetAverageType(param.GetSingleChoiceParam("Average type for expression columns").Value);
            string[] ids2 = mdata.StringColumns[param.GetSingleChoiceParam("ID column").Value];
            string[][] ids = SplitIds(ids2);
            int[] present;
            int[] absent;
            GetPresentAbsentIndices(ids, out present, out absent);
            ids = ArrayUtils.SubArray(ids, present);
            int[][] rowInds = new int[present.Length][];
            for (int i = 0; i < rowInds.Length; i++){
                rowInds[i] = new[]{present[i]};
            }
            ClusterRows(ref rowInds, ref ids);
            if (keepEmpty){
                rowInds = ProlongRowInds(rowInds, absent);
            }
            int nrows = rowInds.Length;
            int ncols = mdata.ExpressionColumnCount;
            float[,] expVals = new float[nrows,ncols];
            for (int j = 0; j < ncols; j++){
                float[] c = mdata.GetExpressionColumn(j);
                for (int i = 0; i < nrows; i++){
                    float[] d = ArrayUtils.SubArray(c, rowInds[i]);
                    expVals[i, j] = Average(d, atype);
                }
            }
            mdata.ExpressionValues = expVals;
            for (int i = 0; i < mdata.NumericColumnCount; i++){
                string name = mdata.NumericColumnNames[i];
                AverageType atype1 = GetAverageType(param.GetSingleChoiceParam("Average type for " + name).Value);
                double[] c = mdata.NumericColumns[i];
                double[] newCol = new double[nrows];
                for (int k = 0; k < nrows; k++){
                    double[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d, atype1);
                }
                mdata.NumericColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.CategoryColumnCount; i++){
                string[][] c = mdata.GetCategoryColumnAt(i);
                string[][] newCol = new string[nrows][];
                for (int k = 0; k < nrows; k++){
                    string[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.SetCategoryColumnAt(newCol,i);
            }
            for (int i = 0; i < mdata.StringColumnCount; i++){
                string[] c = mdata.StringColumns[i];
                string[] newCol = new string[nrows];
                for (int k = 0; k < nrows; k++){
                    string[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.StringColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++){
                double[][] c = mdata.MultiNumericColumns[i];
                double[][] newCol = new double[nrows][];
                for (int k = 0; k < nrows; k++){
                    double[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.MultiNumericColumns[i] = newCol;
            }
        }
Example #3
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool        keepEmpty = param.GetParam <bool>("Keep rows without ID").Value;
            AverageType atype     = GetAverageType(param.GetParam <int>("Average type for expression columns").Value);

            string[]   ids2 = mdata.StringColumns[param.GetParam <int>("ID column").Value];
            string[][] ids  = SplitIds(ids2);
            int[]      present;
            int[]      absent;
            GetPresentAbsentIndices(ids, out present, out absent);
            ids = ArrayUtils.SubArray(ids, present);
            int[][] rowInds = new int[present.Length][];
            for (int i = 0; i < rowInds.Length; i++)
            {
                rowInds[i] = new[] { present[i] };
            }
            ClusterRows(ref rowInds, ref ids);
            if (keepEmpty)
            {
                rowInds = ProlongRowInds(rowInds, absent);
            }
            int nrows = rowInds.Length;
            int ncols = mdata.ColumnCount;

            float[,] expVals = new float[nrows, ncols];
            for (int j = 0; j < ncols; j++)
            {
                double[] c = ArrayUtils.ToDoubles(mdata.Values.GetColumn(j));
                for (int i = 0; i < nrows; i++)
                {
                    double[] d = ArrayUtils.SubArray(c, rowInds[i]);
                    expVals[i, j] = (float)Average(d, atype);
                }
            }
            mdata.Values.Set(expVals);
            for (int i = 0; i < mdata.NumericColumnCount; i++)
            {
                string      name   = mdata.NumericColumnNames[i];
                AverageType atype1 = GetAverageType(param.GetParam <int>("Average type for " + name).Value);
                double[]    c      = mdata.NumericColumns[i];
                double[]    newCol = new double[nrows];
                for (int k = 0; k < nrows; k++)
                {
                    double[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d, atype1);
                }
                mdata.NumericColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.CategoryColumnCount; i++)
            {
                string[][] c      = mdata.GetCategoryColumnAt(i);
                string[][] newCol = new string[nrows][];
                for (int k = 0; k < nrows; k++)
                {
                    string[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.SetCategoryColumnAt(newCol, i);
            }
            for (int i = 0; i < mdata.StringColumnCount; i++)
            {
                string[] c      = mdata.StringColumns[i];
                string[] newCol = new string[nrows];
                for (int k = 0; k < nrows; k++)
                {
                    string[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.StringColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++)
            {
                double[][] c      = mdata.MultiNumericColumns[i];
                double[][] newCol = new double[nrows][];
                for (int k = 0; k < nrows; k++)
                {
                    double[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.MultiNumericColumns[i] = newCol;
            }
        }