public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] cols = param.GetMultiChoiceParam("Columns").Value;
            HashSet<int> w = ArrayUtils.ToHashSet(param.GetMultiChoiceParam("Calculate").Value);
            bool[] include = new bool[SummaryStatisticsRows.procs.Length];
            double[][] rowws = new double[SummaryStatisticsRows.procs.Length][];
            for (int i = 0; i < include.Length; i++){
                include[i] = w.Contains(i);
                if (include[i]){
                    rowws[i] = new double[cols.Length];
                }
            }
            for (int i = 0; i < cols.Length; i++){
                double[] vals = GetColumn(cols[i], mdata);
                for (int j = 0; j < include.Length; j++){
                    if (include[j]){
                        rowws[j][i] = SummaryStatisticsRows.procs[j].Item2(vals);
                    }
                }
            }
            List<double[]> ex = new List<double[]>();
            List<string> names = new List<string>();
            for (int i = 0; i < include.Length; i++){
                if (include[i]){
                    ex.Add(rowws[i]);
                    names.Add(SummaryStatisticsRows.procs[i].Item1);
                }
            }
            float[,] exVals = GetExVals(ex);
            string[] colNames = GetColNames(mdata, cols);
            mdata.SetData("Summary", new List<string>(names.ToArray()), exVals, new List<string>(new[]{"Columns"}),
                new List<string[]>(new[]{colNames}), mdata.CategoryRowNames,
                TransformCategories(mdata, cols, mdata.ExpressionColumnCount), mdata.NumericRowNames,
                TransformNumeric(mdata.NumericRows, cols, mdata.ExpressionColumnCount), new List<string>(), new List<double[][]>());
        }
        public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool falseAreIndicated = param.GetSingleChoiceParam("Indicated are").Value == 0;
            int catCol = param.GetSingleChoiceParam("In column").Value;
            string word = param.GetStringParam("Indicator").Value;
            int[] scoreColumns = param.GetMultiChoiceParam("Scores").Value;
            if (scoreColumns.Length == 0){
                processInfo.ErrString = "Please specify at least one column with scores.";
                return;
            }
            bool largeIsGood = param.GetBoolParam("Large values are good").Value;
            int[] showColumns = param.GetMultiChoiceParam("Display quantity").Value;
            if (showColumns.Length == 0){
                processInfo.ErrString = "Please select at least one quantity to display";
                return;
            }
            bool[] indCol = GetIndicatorColumn(falseAreIndicated, catCol, word, data);
            List<string> expColNames = new List<string>();
            List<float[]> expCols = new List<float[]>();
            foreach (int scoreColumn in scoreColumns){
                double[] vals = scoreColumn < data.NumericColumnCount
                    ? data.NumericColumns[scoreColumn]
                    : ArrayUtils.ToDoubles(data.GetExpressionColumn(scoreColumn - data.NumericColumnCount));
                string name = scoreColumn < data.NumericColumnCount
                    ? data.NumericColumnNames[scoreColumn] : data.ExpressionColumnNames[scoreColumn - data.NumericColumnCount];
                int[] order = GetOrder(vals, largeIsGood);
                CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames);
            }
            float[,] expData = ToMatrix(expCols);
            data.SetData(data.Name, expColNames, expData, new List<string>(), new List<string[]>(), new List<string>(),
                new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>());
        }
Esempio n. 3
0
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     int nameCol = param.GetSingleChoiceParam("New column names").Value;
     float[,] x = ArrayUtils.Transpose(mdata.ExpressionValues);
     List<string> colNames = new List<string>(mdata.StringColumns[nameCol]);
     List<string> rowNames = mdata.ExpressionColumnNames;
     mdata.SetData(mdata.Name, colNames, x, new List<string>(new[]{"Name"}), new List<string[]>(new[]{rowNames.ToArray()}),
         new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(),
         new List<double[][]>());
 }
Esempio n. 4
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int nameCol = param.GetSingleChoiceParam("New column names").Value;
            List<string> colNames;
            if (nameCol >= 0){
                HashSet<string> taken = new HashSet<string>();
                colNames = new List<string>();
                foreach (string n in mdata.StringColumns[nameCol]){
                    string n1 = GetNextAvailableName(n, taken);
                    taken.Add(n1);
                    colNames.Add(n1);
                }
            } else{
                colNames = new List<string>();
                for (int i = 0; i < mdata.RowCount; i++){
                    colNames.Add("Column" + (i + 1));
                }
            }
            List<string> rowNames = mdata.ExpressionColumnNames;
            mdata.SetData(mdata.Name, mdata.Description, colNames, colNames, ArrayUtils.Transpose(mdata.ExpressionValues),
                ArrayUtils.Transpose(mdata.IsImputed), ArrayUtils.Transpose(mdata.QualityValues), mdata.QualityName,
                mdata.QualityBiggerIsBetter, new List<string>(new[]{"Name"}), new List<string>(new[]{"Name"}),
                new List<string[]>(new[]{rowNames.ToArray()}), mdata.CategoryRowNames, mdata.CategoryRowDescriptions,
                GetCategoryRows(mdata), mdata.NumericRowNames, mdata.NumericRowDescriptions, mdata.NumericRows, new List<string>(),
                new List<string>(), new List<double[][]>(), mdata.CategoryColumnNames, mdata.CategoryColumnDescriptions,
                GetCategoryColumns(mdata), mdata.NumericColumnNames, mdata.NumericColumnDescriptions, mdata.NumericColumns);
        }
        private static void LoadData(IList<string> colNames, IList<string> colDescriptions, IList<int> expressionColIndices,
			IList<int> catColIndices, IList<int> numColIndices, IList<int> textColIndices, IList<int> multiNumColIndices,
			string filename, IMatrixData matrixData, IDictionary<string, string[]> annotationRows, Action<int> progress,
			Action<string> status)
        {
            Dictionary<string, string[]> catAnnotatRows;
            Dictionary<string, string[]> numAnnotatRows;
            status("Reading data");
            SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows);
            int nrows = TabSep.GetRowCount(filename, 0, commentPrefix, commentPrefixExceptions);
            float[,] expressionValues = new float[nrows,expressionColIndices.Count];
            List<string[][]> categoryAnnotation = new List<string[][]>();
            foreach (int t in catColIndices){
                categoryAnnotation.Add(new string[nrows][]);
            }
            List<double[]> numericAnnotation = new List<double[]>();
            foreach (int t in numColIndices){
                numericAnnotation.Add(new double[nrows]);
            }
            List<double[][]> multiNumericAnnotation = new List<double[][]>();
            foreach (int t in multiNumColIndices){
                multiNumericAnnotation.Add(new double[nrows][]);
            }
            List<string[]> stringAnnotation = new List<string[]>();
            foreach (int t in textColIndices){
                stringAnnotation.Add(new string[nrows]);
            }
            StreamReader reader = new StreamReader(filename);
            reader.ReadLine();
            int count = 0;
            string line;
            while ((line = reader.ReadLine()) != null){
                progress((100*(count + 1))/nrows);
                if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)){
                    continue;
                }
                string[] w = line.Split('\t');
                for (int i = 0; i < expressionColIndices.Count; i++){
                    if (expressionColIndices[i] >= w.Length){
                        expressionValues[count, i] = float.NaN;
                    } else{
                        string s = StringUtils.RemoveWhitespace(w[expressionColIndices[i]]);
                        bool success = float.TryParse(s, out expressionValues[count, i]);
                        if (!success){
                            expressionValues[count, i] = float.NaN;
                        }
                    }
                }
                for (int i = 0; i < multiNumColIndices.Count; i++){
                    if (multiNumColIndices[i] >= w.Length){
                        multiNumericAnnotation[i][count] = new double[0];
                    } else{
                        string q = w[multiNumColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        multiNumericAnnotation[i][count] = new double[ww.Length];
                        for (int j = 0; j < ww.Length; j++){
                            double q1;
                            bool success = double.TryParse(ww[j], out q1);
                            multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN;
                        }
                    }
                }
                for (int i = 0; i < catColIndices.Count; i++){
                    if (catColIndices[i] >= w.Length){
                        categoryAnnotation[i][count] = new string[0];
                    } else{
                        string q = w[catColIndices[i]].Trim();
                        if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){
                            q = q.Substring(1, q.Length - 2);
                        }
                        if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){
                            q = q.Substring(1, q.Length - 2);
                        }
                        string[] ww = q.Length == 0 ? new string[0] : q.Split(';');
                        Array.Sort(ww);
                        categoryAnnotation[i][count] = ww;
                    }
                }
                for (int i = 0; i < numColIndices.Count; i++){
                    if (numColIndices[i] >= w.Length){
                        numericAnnotation[i][count] = double.NaN;
                    } else{
                        double q;
                        bool success = double.TryParse(w[numColIndices[i]].Trim(), out q);
                        numericAnnotation[i][count] = success ? q : double.NaN;
                    }
                }
                for (int i = 0; i < textColIndices.Count; i++){
                    if (textColIndices[i] >= w.Length){
                        stringAnnotation[i][count] = "";
                    } else{
                        string q = w[textColIndices[i]].Trim();
                        stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q));
                    }
                }
                count++;
            }
            reader.Close();
            string[] columnNames = ArrayUtils.SubArray(colNames, expressionColIndices);
            string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices);
            string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices);
            string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices);
            string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices);
            matrixData.SetData(filename, RemoveQuotes(columnNames), expressionValues, RemoveQuotes(textColnames),
                stringAnnotation, RemoveQuotes(catColnames), categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation,
                RemoveQuotes(multiNumColnames), multiNumericAnnotation);
            if (colDescriptions != null){
                string[] columnDesc = ArrayUtils.SubArray(colDescriptions, expressionColIndices);
                string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices);
                string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices);
                string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices);
                string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices);
                matrixData.ExpressionColumnDescriptions = new List<string>(columnDesc);
                matrixData.NumericColumnDescriptions = new List<string>(numColDesc);
                matrixData.CategoryColumnDescriptions = new List<string>(catColDesc);
                matrixData.StringColumnDescriptions = new List<string>(textColDesc);
                matrixData.MultiNumericColumnDescriptions = new List<string>(multiNumColDesc);
            }
            foreach (string key in ArrayUtils.GetKeys(catAnnotatRows)){
                string name = key;
                string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], expressionColIndices);
                string[][] cat = new string[svals.Length][];
                for (int i = 0; i < cat.Length; i++){
                    string s = svals[i].Trim();
                    cat[i] = s.Length > 0 ? s.Split(';') : new string[0];
                }
                matrixData.AddCategoryRow(name, name, cat);
            }
            foreach (string key in ArrayUtils.GetKeys(numAnnotatRows)){
                string name = key;
                string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], expressionColIndices);
                double[] num = new double[svals.Length];
                for (int i = 0; i < num.Length; i++){
                    string s = svals[i].Trim();
                    num[i] = double.NaN;
                    double.TryParse(s, out num[i]);
                }
                matrixData.AddNumericRow(name, name, num);
            }
            matrixData.Origin = filename;
            status("");
        }
 public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     int[] multiNumCols = param1.GetMultiChoiceParam("Multi-numeric columns").Value;
     Array.Sort(multiNumCols);
     int[] stringCols = param1.GetMultiChoiceParam("String columns").Value;
     Array.Sort(stringCols);
     HashSet<int> multinumCols2 = new HashSet<int>(multiNumCols);
     HashSet<int> stringCols2 = new HashSet<int>(stringCols);
     if (multiNumCols.Length + stringCols.Length == 0){
         processInfo.ErrString = "Please select some columns.";
         return;
     }
     int rowCount = GetNewRowCount(mdata, multiNumCols, stringCols);
     float[,] expVals = new float[rowCount,mdata.ExpressionColumnCount];
     List<string[]> stringC = new List<string[]>();
     for (int i = 0; i < mdata.StringColumnCount; i++){
         stringC.Add(new string[rowCount]);
     }
     List<double[]> numC = new List<double[]>();
     for (int i = 0; i < mdata.NumericColumnCount; i++){
         numC.Add(new double[rowCount]);
     }
     List<string[][]> catC = new List<string[][]>();
     for (int i = 0; i < mdata.CategoryColumnCount; i++){
         catC.Add(new string[rowCount][]);
     }
     List<double[][]> multiNumC = new List<double[][]>();
     for (int i = 0; i < mdata.MultiNumericColumnCount; i++){
         multiNumC.Add(new double[rowCount][]);
     }
     int count = 0;
     for (int i = 0; i < mdata.RowCount; i++){
         string err;
         int entryCount = GetEntryCount(i, mdata, multiNumCols, stringCols, out err);
         if (err != null){
             processInfo.ErrString = err;
             return;
         }
         bool empty = entryCount == 0;
         entryCount = Math.Max(entryCount, 1);
         for (int j = 0; j < entryCount; j++){
             for (int k = 0; k < mdata.ExpressionColumnCount; k++){
                 expVals[count + j, k] = mdata[i, k];
             }
             for (int k = 0; k < mdata.NumericColumnCount; k++){
                 numC[k][count + j] = mdata.NumericColumns[k][i];
             }
             for (int k = 0; k < mdata.CategoryColumnCount; k++){
                 catC[k][count + j] = mdata.CategoryColumns[k][i];
             }
         }
         for (int k = 0; k < mdata.MultiNumericColumnCount; k++){
             if (multinumCols2.Contains(k)){
                 if (empty){
                     multiNumC[k][count] = new double[0];
                 } else{
                     double[] vals = mdata.MultiNumericColumns[k][i];
                     for (int j = 0; j < entryCount; j++){
                         multiNumC[k][count + j] = new[]{vals[j]};
                     }
                 }
             } else{
                 for (int j = 0; j < entryCount; j++){
                     multiNumC[k][count + j] = mdata.MultiNumericColumns[k][i];
                 }
             }
         }
         for (int k = 0; k < mdata.StringColumnCount; k++){
             if (stringCols2.Contains(k)){
                 if (empty){
                     stringC[k][count] = "";
                 } else{
                     string[] vals = mdata.StringColumns[k][i].Split(';');
                     for (int j = 0; j < entryCount; j++){
                         stringC[k][count + j] = vals[j];
                     }
                 }
             } else{
                 for (int j = 0; j < entryCount; j++){
                     stringC[k][count + j] = mdata.StringColumns[k][i];
                 }
             }
         }
         count += entryCount;
     }
     int[] multiNumComplement = ArrayUtils.Complement(multiNumCols, mdata.MultiNumericColumnCount);
     List<double[][]> toBeTransformed = ArrayUtils.SubList(multiNumC, multiNumCols);
     multiNumC = ArrayUtils.SubList(multiNumC, multiNumComplement);
     foreach (double[][] d in toBeTransformed){
         numC.Add(Transform(d));
     }
     mdata.SetData(mdata.Name, mdata.ExpressionColumnNames, expVals, mdata.StringColumnNames, stringC,
         mdata.CategoryColumnNames, catC,
         new List<string>(ArrayUtils.Concat(mdata.NumericColumnNames,
             ArrayUtils.SubList(mdata.MultiNumericColumnNames, multiNumCols))), numC,
         new List<string>(ArrayUtils.SubArray(mdata.MultiNumericColumnNames, multiNumComplement)), multiNumC);
 }
 private static void CreateMatrixData(CountingResult result, IMatrixData data, int minCount, IEnumerable selection)
 {
     List<string[]> type = new List<string[]>();
     List<string[]> name = new List<string[]>();
     List<double> count = new List<double>();
     List<double> percOfTotal = new List<double>();
     List<double> selCount = new List<double>();
     List<double> selPerc = new List<double>();
     for (int i = 0; i < result.Count; i++){
         int c = result.GetTotalCountAt(i);
         if (c < minCount){
             continue;
         }
         type.Add(new[]{result.GetType1At(i)});
         name.Add(new[]{result.GetName1At(i)});
         count.Add(c);
         percOfTotal.Add(Math.Round(10000.0*c/data.RowCount)/100.0);
         if (selection != null){
             int c1 = result.GetSelectCountAt(i);
             selCount.Add(c1);
             selPerc.Add(Math.Round(1000.0*c1/c)/10.0);
         }
     }
     float[,] ex = new float[type.Count,0];
     List<string[][]> catCols = new List<string[][]>{type.ToArray(), name.ToArray()};
     List<string> catColNames = new List<string>(new[]{"Type", "Name"});
     List<double[]> numCols = new List<double[]>{count.ToArray(), percOfTotal.ToArray()};
     if (selection != null){
         numCols.Add(selCount.ToArray());
         numCols.Add(selPerc.ToArray());
     }
     List<string> numColNames = new List<string>(new[]{"Count", "Percentage of total"});
     if (selection != null){
         numColNames.AddRange(new[]{"Selection count", "Selection percentage"});
     }
     data.SetData("Count", new List<string>(), ex, new List<string>(), new List<string[]>(), catColNames, catCols,
         numColNames, numCols, new List<string>(), new List<double[][]>());
 }