public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Row");
            int colInd = p.Value;

            if (colInd < 0)
            {
                processInfo.ErrString = "No categorical rows available.";
                return;
            }
            Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values");

            int[] inds = mcp.Value;
            if (inds.Length < 1)
            {
                processInfo.ErrString = "Please select at least two terms for merging.";
                return;
            }
            string newTerm = param.GetParam <string>("New term").Value;

            if (newTerm.Length == 0)
            {
                processInfo.ErrString = "Please specify a new term.";
                return;
            }
            string[] values = new string[inds.Length];
            for (int i = 0; i < values.Length; i++)
            {
                values[i] = mdata.GetCategoryRowValuesAt(colInd)[inds[i]];
            }
            HashSet <string> value = new HashSet <string>(values);

            string[][] cats   = mdata.GetCategoryRowAt(colInd);
            string[][] newCat = new string[cats.Length][];
            for (int i = 0; i < cats.Length; i++)
            {
                string[] w       = cats[i];
                bool     changed = false;
                for (int j = 0; j < w.Length; j++)
                {
                    if (value.Contains(w[j]))
                    {
                        w[j]    = newTerm;
                        changed = true;
                    }
                }
                if (changed)
                {
                    Array.Sort(w);
                }
                newCat[i] = w;
            }
            mdata.SetCategoryRowAt(newCat, colInd);
        }
Ejemplo n.º 2
0
 private static void StringToExpression(IList <int> colInds, IMatrixData mdata)
 {
     int[]      inds         = ArrayUtils.Complement(colInds, mdata.StringColumnCount);
     string[]   names        = ArrayUtils.SubArray(mdata.StringColumnNames, colInds);
     string[]   descriptions = ArrayUtils.SubArray(mdata.StringColumnDescriptions, colInds);
     string[][] str          = ArrayUtils.SubArray(mdata.StringColumns, colInds);
     double[][] newEx        = new double[str.Length][];
     for (int j = 0; j < str.Length; j++)
     {
         newEx[j] = new double[str[j].Length];
         for (int i = 0; i < newEx[j].Length; i++)
         {
             bool success = Parser.TryDouble(str[j][i], out double f);
             newEx[j][i] = success ? f : double.NaN;
         }
     }
     double[,] newExp     = new double[mdata.RowCount, mdata.ColumnCount + str.Length];
     double[,] newQual    = new double[mdata.RowCount, mdata.ColumnCount + str.Length];
     bool[,] newIsImputed = new bool[mdata.RowCount, mdata.ColumnCount + str.Length];
     for (int i = 0; i < mdata.RowCount; i++)
     {
         for (int j = 0; j < mdata.ColumnCount; j++)
         {
             newExp[i, j]       = mdata.Values.Get(i, j);
             newQual[i, j]      = mdata.Quality.Get(i, j);
             newIsImputed[i, j] = mdata.IsImputed[i, j];
         }
         for (int j = 0; j < newEx.Length; j++)
         {
             newExp[i, j + mdata.ColumnCount]       = newEx[j][i];
             newQual[i, j + mdata.ColumnCount]      = double.NaN;
             newIsImputed[i, j + mdata.ColumnCount] = false;
         }
     }
     mdata.Values.Set(newExp);
     mdata.Quality.Set(newQual);
     mdata.IsImputed.Set(newIsImputed);
     mdata.ColumnNames.AddRange(names);
     mdata.ColumnDescriptions.AddRange(descriptions);
     mdata.StringColumns      = ArrayUtils.SubList(mdata.StringColumns, inds);
     mdata.StringColumnNames  = ArrayUtils.SubList(mdata.StringColumnNames, inds);
     mdata.ColumnDescriptions = ArrayUtils.SubList(mdata.StringColumnDescriptions, inds);
     for (int i = 0; i < mdata.CategoryRowCount; i++)
     {
         mdata.SetCategoryRowAt(ExtendCategoryRow(mdata.GetCategoryRowAt(i), str.Length), i);
     }
     for (int i = 0; i < mdata.NumericRows.Count; i++)
     {
         mdata.NumericRows[i] = ExtendNumericRow(mdata.NumericRows[i], str.Length);
     }
     for (int i = 0; i < mdata.StringRows.Count; i++)
     {
         mdata.StringRows[i] = ExtendStringRow(mdata.StringRows[i], str.Length);
     }
 }
Ejemplo n.º 3
0
 private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata,
                                        Func <IList <double>, double> func)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[,] newExCols  = new double[mdata.RowCount, groupNames.Length];
     double[,] newQuality = new double[mdata.RowCount, groupNames.Length];
     bool[,] newImputed   = new bool[mdata.RowCount, groupNames.Length];
     for (int i = 0; i < newExCols.GetLength(0); i++)
     {
         for (int j = 0; j < newExCols.GetLength(1); j++)
         {
             List <double> vals = new List <double>();
             List <bool>   imps = new List <bool>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                     imps.Add(mdata.IsImputed[i, ind]);
                 }
             }
             bool   imp = false;
             double xy  = double.NaN;
             if (vals.Count >= validVals)
             {
                 xy  = func(vals);
                 imp = ArrayUtils.Or(imps);
             }
             newExCols[i, j]  = xy;
             newQuality[i, j] = double.NaN;
             newImputed[i, j] = imp;
         }
     }
     mdata.ColumnNames        = new List <string>(groupNames);
     mdata.ColumnDescriptions = GetEmpty(groupNames);
     mdata.Values.Set(newExCols);
     mdata.Quality.Set(newQuality);
     mdata.IsImputed.Set(newImputed);
     mdata.RemoveCategoryRowAt(groupColInd);
     for (int i = 0; i < mdata.CategoryRowCount; i++)
     {
         mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i);
     }
     for (int i = 0; i < mdata.NumericRows.Count; i++)
     {
         mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds);
     }
 }
Ejemplo n.º 4
0
        private static void NumericToExpression(IList <int> colInds, IMatrixData mdata)
        {
            int[]      inds         = ArrayUtils.Complement(colInds, mdata.NumericColumnCount);
            string[]   names        = ArrayUtils.SubArray(mdata.NumericColumnNames, colInds);
            string[]   descriptions = ArrayUtils.SubArray(mdata.NumericColumnDescriptions, colInds);
            double[][] num          = ArrayUtils.SubArray(mdata.NumericColumns, colInds);
            var        newEx        = new float[num.Length][];

            for (int j = 0; j < num.Length; j++)
            {
                newEx[j] = new float[num[j].Length];
                for (int i = 0; i < newEx[j].Length; i++)
                {
                    newEx[j][i] = (float)num[j][i];
                }
            }
            float[,] newExp      = new float[mdata.RowCount, mdata.ColumnCount + num.Length];
            float[,] newQual     = new float[mdata.RowCount, mdata.ColumnCount + num.Length];
            bool[,] newIsImputed = new bool[mdata.RowCount, mdata.ColumnCount + num.Length];
            for (int i = 0; i < mdata.RowCount; i++)
            {
                for (int j = 0; j < mdata.ColumnCount; j++)
                {
                    newExp[i, j]       = mdata.Values[i, j];
                    newQual[i, j]      = mdata.Quality[i, j];
                    newIsImputed[i, j] = mdata.IsImputed[i, j];
                }
                for (int j = 0; j < newEx.Length; j++)
                {
                    newExp[i, j + mdata.ColumnCount]       = newEx[j][i];
                    newQual[i, j + mdata.ColumnCount]      = float.NaN;
                    newIsImputed[i, j + mdata.ColumnCount] = false;
                }
            }
            mdata.Values.Set(newExp);
            mdata.Quality.Set(newQual);
            mdata.IsImputed.Set(newIsImputed);
            mdata.ColumnNames.AddRange(names);
            mdata.ColumnDescriptions.AddRange(descriptions);
            mdata.NumericColumns            = ArrayUtils.SubList(mdata.NumericColumns, inds);
            mdata.NumericColumnNames        = ArrayUtils.SubList(mdata.NumericColumnNames, inds);
            mdata.NumericColumnDescriptions = ArrayUtils.SubList(mdata.NumericColumnDescriptions, inds);
            for (int i = 0; i < mdata.CategoryRowCount; i++)
            {
                mdata.SetCategoryRowAt(ExtendCategoryRow(mdata.GetCategoryRowAt(i), num.Length), i);
            }
            for (int i = 0; i < mdata.NumericRows.Count; i++)
            {
                mdata.NumericRows[i] = ExtendNumericRow(mdata.NumericRows[i], num.Length);
            }
        }
 private static void ProcessDataEdit(IMatrixData mdata, Parameters param)
 {
     SingleChoiceWithSubParams s = param.GetSingleChoiceWithSubParams("Category row");
     int groupColInd = s.Value;
     Parameters sp = s.GetSubParameters();
     string[][] newRow = new string[mdata.ExpressionColumnCount][];
     for (int i = 0; i < mdata.ExpressionColumnCount; i++){
         string t = mdata.ExpressionColumnNames[i];
         string x = sp.GetStringParam(t).Value;
         newRow[i] = x.Length > 0 ? x.Split(';') : new string[0];
     }
     mdata.SetCategoryRowAt(newRow, groupColInd);
 }
Ejemplo n.º 6
0
 public static void DivideByColumn(IMatrixData data, int index)
 {
     int p = data.RowCount;
     int n = data.ExpressionColumnCount;
     float[,] newEx = new float[p,n - 1];
     for (int i = 0; i < p; i++){
         for (int j = 0; j < index; j++){
             newEx[i, j] = data[i, j]/data[i, index];
             if (float.IsInfinity(newEx[i, j])){
                 newEx[i, j] = float.NaN;
             }
         }
         for (int j = index + 1; j < n; j++){
             newEx[i, j - 1] = data[i, j]/data[i, index];
             if (float.IsInfinity(newEx[i, j - 1])){
                 newEx[i, j - 1] = float.NaN;
             }
         }
     }
     bool[,] newImp = new bool[p,n - 1];
     for (int i = 0; i < p; i++){
         for (int j = 0; j < index; j++){
             newImp[i, j] = data.IsImputed[i, j] || data.IsImputed[i, index];
         }
         for (int j = index + 1; j < n; j++){
             newImp[i, j - 1] = data.IsImputed[i, j] || data.IsImputed[i, index];
         }
     }
     data.ExpressionValues = newEx;
     data.IsImputed = newImp;
     data.ExpressionColumnNames.RemoveAt(index);
     data.ExpressionColumnDescriptions.RemoveAt(index);
     for (int i = 0; i < data.CategoryRowCount; i++){
         data.SetCategoryRowAt(ArrayUtils.RemoveAtIndex(data.GetCategoryRowAt(i), index), i);
     }
     for (int i = 0; i < data.NumericRowCount; i++){
         data.NumericRows[i] = ArrayUtils.RemoveAtIndex(data.NumericRows[i], index);
     }
 }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            SingleChoiceWithSubParams p = param.GetSingleChoiceWithSubParams("Row");
            int colInd = p.Value;
            if (colInd < 0) {
                processInfo.ErrString = "No categorical rows available.";
                return;
            }
            MultiChoiceParam mcp = p.GetSubParameters().GetMultiChoiceParam("Values");
            int[] inds = mcp.Value;
            if (inds.Length < 1) {
                processInfo.ErrString = "Please select at least two terms for merging.";
                return;
            }
            string newTerm = param.GetStringParam("New term").Value;
            if (newTerm.Length == 0){
                processInfo.ErrString = "Please specify a new term.";
                return;
            }

            string[] values = new string[inds.Length];
            for (int i = 0; i < values.Length; i++) {
                values[i] = mdata.GetCategoryRowValuesAt(colInd)[inds[i]];
            }
            HashSet<string> value = new HashSet<string>(values);
            string[][] cats = mdata.GetCategoryRowAt(colInd);
            string[][] newCat = new string[cats.Length][];
            for (int i = 0; i < cats.Length; i++){
                string[] w = cats[i];
                bool changed = false;
                for (int j = 0; j < w.Length; j++){
                    if (value.Contains(w[j])){
                        w[j] = newTerm;
                        changed = true;
                    }
                }
                if (changed){
                    Array.Sort(w);
                }
                newCat[i] = w;
            }
            mdata.SetCategoryRowAt(newCat, colInd);
        }
Ejemplo n.º 8
0
        private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata,
			Func<IList<double>, double> func)
        {
            string[][] groupCol = mdata.GetCategoryRowAt(groupColInd);
            string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
            int[][] colInds = PerseusPluginUtils.GetExpressionColIndices(groupCol, groupNames);
            float[,] newExCols = new float[mdata.RowCount,groupNames.Length];
            float[,] newQuality = new float[mdata.RowCount,groupNames.Length];
            bool[,] newImputed = new bool[mdata.RowCount,groupNames.Length];
            for (int i = 0; i < newExCols.GetLength(0); i++){
                for (int j = 0; j < newExCols.GetLength(1); j++){
                    List<double> vals = new List<double>();
                    List<bool> imps = new List<bool>();
                    foreach (int ind in colInds[j]){
                        double val = mdata[i, ind];
                        if (!double.IsNaN(val) && !double.IsInfinity(val)){
                            vals.Add(val);
                            imps.Add(mdata.IsImputed[i, ind]);
                        }
                    }
                    bool imp = false;
                    float xy = float.NaN;
                    if (vals.Count >= validVals){
                        xy = (float) func(vals);
                        imp = ArrayUtils.Or(imps);
                    }
                    newExCols[i, j] = xy;
                    newQuality[i, j] = float.NaN;
                    newImputed[i, j] = imp;
                }
            }
            mdata.ExpressionColumnNames = new List<string>(groupNames);
            mdata.ExpressionColumnDescriptions = GetEmpty(groupNames);
            mdata.ExpressionValues = newExCols;
            mdata.QualityValues = newQuality;
            mdata.IsImputed = newImputed;
            mdata.RemoveCategoryRowAt(groupColInd);
            for (int i = 0; i < mdata.CategoryRowCount; i++){
                mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i);
            }
            for (int i = 0; i < mdata.NumericRows.Count; i++){
                mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds);
            }
        }
Ejemplo n.º 9
0
 private static void StringToExpression(IList<int> colInds, IMatrixData mdata)
 {
     int[] inds = ArrayUtils.Complement(colInds, mdata.StringColumnCount);
     string[] names = ArrayUtils.SubArray(mdata.StringColumnNames, colInds);
     string[] descriptions = ArrayUtils.SubArray(mdata.StringColumnDescriptions, colInds);
     string[][] str = ArrayUtils.SubArray(mdata.StringColumns, colInds);
     float[][] newEx = new float[str.Length][];
     for (int j = 0; j < str.Length; j++){
         newEx[j] = new float[str[j].Length];
         for (int i = 0; i < newEx[j].Length; i++){
             float f;
             bool success = float.TryParse(str[j][i], out f);
             newEx[j][i] = success ? f : float.NaN;
         }
     }
     float[,] newExp = new float[mdata.RowCount,mdata.ExpressionColumnCount + str.Length];
     float[,] newQual = new float[mdata.RowCount,mdata.ExpressionColumnCount + str.Length];
     bool[,] newIsImputed = new bool[mdata.RowCount,mdata.ExpressionColumnCount + str.Length];
     for (int i = 0; i < mdata.RowCount; i++){
         for (int j = 0; j < mdata.ExpressionColumnCount; j++){
             newExp[i, j] = mdata[i, j];
             newQual[i, j] = mdata.QualityValues[i, j];
             newIsImputed[i, j] = mdata.IsImputed[i, j];
         }
         for (int j = 0; j < newEx.Length; j++){
             newExp[i, j + mdata.ExpressionColumnCount] = newEx[j][i];
             newQual[i, j + mdata.ExpressionColumnCount] = float.NaN;
             newIsImputed[i, j + mdata.ExpressionColumnCount] = false;
         }
     }
     mdata.ExpressionValues = newExp;
     mdata.QualityValues = newQual;
     mdata.IsImputed = newIsImputed;
     mdata.ExpressionColumnNames.AddRange(names);
     mdata.ExpressionColumnDescriptions.AddRange(descriptions);
     mdata.StringColumns = ArrayUtils.SubList(mdata.StringColumns, inds);
     mdata.StringColumnNames = ArrayUtils.SubList(mdata.StringColumnNames, inds);
     mdata.StringColumnDescriptions = ArrayUtils.SubList(mdata.StringColumnDescriptions, inds);
     for (int i = 0; i < mdata.CategoryRowCount; i++){
         mdata.SetCategoryRowAt(ExtendCategoryRow(mdata.GetCategoryRowAt(i), str.Length),i);
     }
     for (int i = 0; i < mdata.NumericRows.Count; i++){
         mdata.NumericRows[i] = ExtendNumericRow(mdata.NumericRows[i], str.Length);
     }
 }