public IMatrixData CombineData(IMatrixData matrixData1, IMatrixData matrixData2, Parameters parameters,
			ProcessInfo processInfo)
        {
            bool indicator = parameters.GetBoolParam("Indicator").Value;
            int otherCol = parameters.GetSingleChoiceParam("Matching column 2").Value;
            Average avExpression = GetAveraging(parameters.GetSingleChoiceParam("Combine expression values").Value);
            Average avNumerical = GetAveraging(parameters.GetSingleChoiceParam("Combine numerical values").Value);
            string[] q = matrixData2.StringColumns[otherCol];
            string[][] w = new string[q.Length][];
            for (int i = 0; i < q.Length; i++){
                string r = q[i].Trim();
                w[i] = r.Length == 0 ? new string[0] : r.Split(';');
                w[i] = ArrayUtils.UniqueValues(w[i]);
            }
            Dictionary<string, List<int>> id2Cols = new Dictionary<string, List<int>>();
            for (int i = 0; i < w.Length; i++){
                foreach (string s in w[i]){
                    if (!id2Cols.ContainsKey(s)){
                        id2Cols.Add(s, new List<int>());
                    }
                    id2Cols[s].Add(i);
                }
            }
            int pgCol = parameters.GetSingleChoiceParam("Matching column 1").Value;
            string[] d = matrixData1.StringColumns[pgCol];
            string[][] x = new string[d.Length][];
            for (int i = 0; i < d.Length; i++){
                string r = d[i].Trim();
                x[i] = r.Length == 0 ? new string[0] : r.Split(';');
                x[i] = ArrayUtils.UniqueValues(x[i]);
            }
            int[][] indexMap = new int[x.Length][];
            string[][] indicatorCol = new string[x.Length][];
            for (int i = 0; i < indexMap.Length; i++){
                List<int> qwer = new List<int>();
                foreach (string s in x[i]){
                    if (id2Cols.ContainsKey(s)){
                        List<int> en = id2Cols[s];
                        qwer.AddRange(en);
                    }
                }
                indexMap[i] = qwer.ToArray();
                indexMap[i] = ArrayUtils.UniqueValues(indexMap[i]);
                indicatorCol[i] = indexMap[i].Length > 0 ? new[]{"+"} : new string[0];
            }
            IMatrixData result = matrixData1.Copy();
            SetAnnotationRows(result, matrixData1, matrixData2);
            if (indicator){
                result.AddCategoryColumn(matrixData2.Name, "", indicatorCol);
            }
                {
                    int[] exCols = parameters.GetMultiChoiceParam("Expression columns").Value;
                    float[,] newExColumns = new float[matrixData1.RowCount, exCols.Length];
                    float[,] newQuality = new float[matrixData1.RowCount, exCols.Length];
                    bool[,] newIsImputed = new bool[matrixData1.RowCount, exCols.Length];
                    string[] newExColNames = new string[exCols.Length];
                    float[,] oldEx = matrixData2.ExpressionValues;
                    float[,] oldQual = matrixData2.QualityValues;
                    bool[,] oldImp = matrixData2.IsImputed;
                    for (int i = 0; i < exCols.Length; i++) {
                        newExColNames[i] = matrixData2.ExpressionColumnNames[exCols[i]];
                        for (int j = 0; j < matrixData1.RowCount; j++){
                            int[] inds = indexMap[j];
                            List<double> values = new List<double>();
                            List<double> qual = new List<double>();
                            List<bool> imp = new List<bool>();
                            foreach (int ind in inds) {
                                double v = oldEx[ind, exCols[i]];
                                if (!double.IsNaN(v) && !double.IsInfinity(v)){
                                    values.Add(v);
                                    double qx = oldQual[ind, exCols[i]];
                                    if (!double.IsNaN(qx) && !double.IsInfinity(qx)){
                                        qual.Add(qx);
                                    }
                                    bool isi = oldImp[ind, exCols[i]];
                                    imp.Add(isi);
                                }
                            }
                            newExColumns[j, i] = values.Count == 0 ? float.NaN : (float)avExpression(values.ToArray());
                            newQuality[j, i] = qual.Count == 0 ? float.NaN : (float)avExpression(qual.ToArray());
                            newIsImputed[j, i] = imp.Count != 0 && AvImp(imp.ToArray());
                        }
                    }
                    MakeNewNames(newExColNames, result.ExpressionColumnNames);
                    AddExpressionColumns(result, newExColNames, newExColumns, newQuality, newIsImputed);
                }
                {
                    int[] numCols = parameters.GetMultiChoiceParam("Numerical columns").Value;
                    double[][] newNumericalColumns = new double[numCols.Length][];
                    string[] newNumColNames = new string[numCols.Length];
                    for (int i = 0; i < numCols.Length; i++){
                        double[] oldCol = matrixData2.NumericColumns[numCols[i]];
                        newNumColNames[i] = matrixData2.NumericColumnNames[numCols[i]];
                        newNumericalColumns[i] = new double[matrixData1.RowCount];
                        for (int j = 0; j < matrixData1.RowCount; j++){
                            int[] inds = indexMap[j];
                            List<double> values = new List<double>();
                            foreach (int ind in inds){
                                double v = oldCol[ind];
                                if (!double.IsNaN(v)){
                                    values.Add(v);
                                }
                            }
                            newNumericalColumns[i][j] = values.Count == 0 ? double.NaN : avNumerical(values.ToArray());
                        }
                    }
                    for (int i = 0; i < numCols.Length; i++){
                        result.AddNumericColumn(newNumColNames[i], "", newNumericalColumns[i]);
                    }
                }
                {
                    int[] catCols = parameters.GetMultiChoiceParam("Categorical columns").Value;
                    string[][][] newCatColumns = new string[catCols.Length][][];
                    string[] newCatColNames = new string[catCols.Length];
                    for (int i = 0; i < catCols.Length; i++){
                        string[][] oldCol = matrixData2.CategoryColumns[catCols[i]];
                        newCatColNames[i] = matrixData2.CategoryColumnNames[catCols[i]];
                        newCatColumns[i] = new string[matrixData1.RowCount][];
                        for (int j = 0; j < matrixData1.RowCount; j++){
                            int[] inds = indexMap[j];
                            List<string[]> values = new List<string[]>();
                            foreach (int ind in inds){
                                string[] v = oldCol[ind];
                                if (v.Length > 0){
                                    values.Add(v);
                                }
                            }
                            newCatColumns[i][j] = values.Count == 0
                                ? new string[0] : ArrayUtils.UniqueValues(ArrayUtils.Concat(values.ToArray()));
                        }
                    }
                    for (int i = 0; i < catCols.Length; i++){
                        result.AddCategoryColumn(newCatColNames[i], "", newCatColumns[i]);
                    }
                }
                {
                    int[] stringCols = parameters.GetMultiChoiceParam("String columns").Value;
                    string[][] newStringColumns = new string[stringCols.Length][];
                    string[] newStringColNames = new string[stringCols.Length];
                    for (int i = 0; i < stringCols.Length; i++){
                        string[] oldCol = matrixData2.StringColumns[stringCols[i]];
                        newStringColNames[i] = matrixData2.StringColumnNames[stringCols[i]];
                        newStringColumns[i] = new string[matrixData1.RowCount];
                        for (int j = 0; j < matrixData1.RowCount; j++){
                            int[] inds = indexMap[j];
                            List<string> values = new List<string>();
                            foreach (int ind in inds){
                                string v = oldCol[ind];
                                if (v.Length > 0){
                                    values.Add(v);
                                }
                            }
                            newStringColumns[i][j] = values.Count == 0 ? "" : StringUtils.Concat(";", values.ToArray());
                        }
                    }
                    for (int i = 0; i < stringCols.Length; i++){
                        result.AddStringColumn(newStringColNames[i], "", newStringColumns[i]);
                    }
                }
            result.Origin = "Combination";
            return result;
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool keepEmpty = param.GetBoolParam("Keep rows without ID").Value;
            AverageType atype = GetAverageType(param.GetSingleChoiceParam("Average type for expression columns").Value);
            string[] ids2 = mdata.StringColumns[param.GetSingleChoiceParam("ID column").Value];
            string[][] ids = SplitIds(ids2);
            int[] present;
            int[] absent;
            GetPresentAbsentIndices(ids, out present, out absent);
            ids = ArrayUtils.SubArray(ids, present);
            int[][] rowInds = new int[present.Length][];
            for (int i = 0; i < rowInds.Length; i++){
                rowInds[i] = new[]{present[i]};
            }
            ClusterRows(ref rowInds, ref ids);
            if (keepEmpty){
                rowInds = ProlongRowInds(rowInds, absent);
            }
            int nrows = rowInds.Length;
            int ncols = mdata.ExpressionColumnCount;
            float[,] expVals = new float[nrows,ncols];
            for (int j = 0; j < ncols; j++){
                float[] c = mdata.GetExpressionColumn(j);
                for (int i = 0; i < nrows; i++){
                    float[] d = ArrayUtils.SubArray(c, rowInds[i]);
                    expVals[i, j] = Average(d, atype);
                }
            }
            mdata.ExpressionValues = expVals;
            for (int i = 0; i < mdata.NumericColumnCount; i++){
                string name = mdata.NumericColumnNames[i];
                AverageType atype1 = GetAverageType(param.GetSingleChoiceParam("Average type for " + name).Value);
                double[] c = mdata.NumericColumns[i];
                double[] newCol = new double[nrows];
                for (int k = 0; k < nrows; k++){
                    double[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d, atype1);
                }
                mdata.NumericColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.CategoryColumnCount; i++){
                string[][] c = mdata.GetCategoryColumnAt(i);
                string[][] newCol = new string[nrows][];
                for (int k = 0; k < nrows; k++){
                    string[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.SetCategoryColumnAt(newCol,i);
            }
            for (int i = 0; i < mdata.StringColumnCount; i++){
                string[] c = mdata.StringColumns[i];
                string[] newCol = new string[nrows];
                for (int k = 0; k < nrows; k++){
                    string[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.StringColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++){
                double[][] c = mdata.MultiNumericColumns[i];
                double[][] newCol = new double[nrows][];
                for (int k = 0; k < nrows; k++){
                    double[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.MultiNumericColumns[i] = newCol;
            }
        }
        public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool falseAreIndicated = param.GetSingleChoiceParam("Indicated are").Value == 0;
            int catCol = param.GetSingleChoiceParam("In column").Value;
            string word = param.GetStringParam("Indicator").Value;
            int[] scoreColumns = param.GetMultiChoiceParam("Scores").Value;
            if (scoreColumns.Length == 0){
                processInfo.ErrString = "Please specify at least one column with scores.";
                return;
            }
            bool largeIsGood = param.GetBoolParam("Large values are good").Value;
            int[] showColumns = param.GetMultiChoiceParam("Display quantity").Value;
            if (showColumns.Length == 0){
                processInfo.ErrString = "Please select at least one quantity to display";
                return;
            }
            bool[] indCol = GetIndicatorColumn(falseAreIndicated, catCol, word, data);
            List<string> expColNames = new List<string>();
            List<float[]> expCols = new List<float[]>();
            foreach (int scoreColumn in scoreColumns){
                double[] vals = scoreColumn < data.NumericColumnCount
                    ? data.NumericColumns[scoreColumn]
                    : ArrayUtils.ToDoubles(data.GetExpressionColumn(scoreColumn - data.NumericColumnCount));
                string name = scoreColumn < data.NumericColumnCount
                    ? data.NumericColumnNames[scoreColumn] : data.ExpressionColumnNames[scoreColumn - data.NumericColumnCount];
                int[] order = GetOrder(vals, largeIsGood);
                CalcCurve(ArrayUtils.SubArray(indCol, order), showColumns, name, expCols, expColNames);
            }
            float[,] expData = ToMatrix(expCols);
            data.SetData(data.Name, expColNames, expData, new List<string>(), new List<string[]>(), new List<string>(),
                new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>());
        }
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ProcessInfo processInfo)
 {
     int colInd = param.GetSingleChoiceParam("Column").Value;
     double value = param.GetDoubleParam("Value").Value;
     int ruleInd = param.GetSingleChoiceParam("Remove if").Value;
     bool keepNan = param.GetBoolParam("Keep NaN").Value;
     double[] vals = colInd < mdata.NumericColumnCount
         ? mdata.NumericColumns[colInd] : ArrayUtils.ToDoubles(mdata.GetExpressionColumn(colInd - mdata.NumericColumnCount));
     List<int> valids = new List<int>();
     for (int i = 0; i < vals.Length; i++){
         bool valid;
         double val = vals[i];
         if (double.IsNaN(val)){
             valid = keepNan;
         } else{
             switch (ruleInd){
                 case 0:
                     valid = val > value;
                     break;
                 case 1:
                     valid = val >= value;
                     break;
                 case 2:
                     valid = val != value;
                     break;
                 case 3:
                     valid = val == value;
                     break;
                 case 4:
                     valid = val <= value;
                     break;
                 case 5:
                     valid = val < value;
                     break;
                 default:
                     throw new Exception("Never get here.");
             }
         }
         if (valid){
             valids.Add(i);
         }
     }
     PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray());
 }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int colInd = param.GetSingleChoiceParam("Column").Value;
            string searchString = param.GetStringParam("Search string").Value;
            if (string.IsNullOrEmpty(searchString)){
                processInfo.ErrString = "Please provide a search string";
                return;
            }
            bool remove = param.GetSingleChoiceParam("Mode").Value == 0;
            bool matchCase = param.GetBoolParam("Match case").Value;
            bool matchWholeWord = param.GetBoolParam("Match whole word").Value;
            string[] vals = mdata.StringColumns[colInd];
            List<int> valids = new List<int>();
            for (int i = 0; i < vals.Length; i++){
                bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord);
                if (matches && !remove){
                    valids.Add(i);
                } else if (!matches && remove){
                    valids.Add(i);
                }
            }
            PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray());
        }
示例#6
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int ind = param.GetSingleChoiceParam("Column").Value;
            bool descending = param.GetBoolParam("Descending").Value;
            if (ind < mdata.ExpressionColumnCount){
                float[] v = mdata.GetExpressionColumn(ind);
                int[] o = ArrayUtils.Order(v);
                if (descending){
                    ArrayUtils.Revert(o);
                }
                mdata.ExtractExpressionRows(o);
            } else{
                double[] v = mdata.NumericColumns[ind - mdata.ExpressionColumnCount];
                int[] o = ArrayUtils.Order(v);
                if (descending){
                    ArrayUtils.Revert(o);
                }
                mdata.ExtractExpressionRows(o);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int avType = param.GetSingleChoiceParam("Average type").Value;
            if (mdata.CategoryRowCount == 0){
                processInfo.ErrString = "No category rows were loaded.";
                return;
            }
            int groupColInd = param.GetSingleChoiceParam("Grouping").Value;
            int validVals = param.GetIntParam("Min. valid values per group").Value;
            bool keep = param.GetBoolParam("Keep original data").Value;
            bool sdev = param.GetBoolParam("Add standard deviation").Value;
            Func<IList<double>, double> func;
            switch (avType){
                case 0:
                    func = ArrayUtils.Median;
                    break;
                case 1:
                    func = ArrayUtils.Mean;
                    break;
                case 2:
                    func = ArrayUtils.Sum;
                    break;
                case 3:
                    func = ArrayUtils.GeometricMean;
                    break;
                default:
                    throw new Exception("Never get here.");
            }
            if (sdev) {
                AddStandardDeviation(groupColInd, validVals, mdata);
            }
            if (keep) {
                FillMatrixKeep(groupColInd, validVals, mdata, func);
            } else{
                FillMatrixDontKeep(groupColInd, validVals, mdata, func);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string colName = param.GetStringParam("Name of new column").Value;
            int[] columns = param.GetMultiChoiceParam("Categories").Value;
            bool inverse = param.GetBoolParam("Inverse").Value;
            int[] catCols;
            int[] stringCols;
            Split(columns, out catCols, out stringCols, mdata.CategoryColumnCount);
            string[] word1 = param.GetMultiStringParam("Search terms").Value;
            if (word1.Length == 0){
                processInfo.ErrString = "Please specify one or more search terms.";
                return;
            }
            if (string.IsNullOrEmpty(colName)){
                colName = word1[0];
            }
            string[] word = new string[word1.Length];
            for (int i = 0; i < word.Length; i++){
                word[i] = word1[i].ToLower().Trim();
            }
            bool[] indicator = new bool[mdata.RowCount];
            foreach (int col in catCols){
                string[][] cat = mdata.GetCategoryColumnAt(col);
                for (int i = 0; i < cat.Length; i++){
                    foreach (string s in cat[i]){
                        foreach (string s1 in word){
                            if (s.ToLower().Contains(s1)){
                                indicator[i] = true;
                                break;
                            }
                        }
                    }
                }
            }
            foreach (string[] txt in stringCols.Select(col => mdata.StringColumns[col])){
                for (int i = 0; i < txt.Length; i++){
                    string s = txt[i];
                    foreach (string s1 in word){
                        if (s.ToLower().Contains(s1)){
                            indicator[i] = true;
                            break;
                        }
                    }
                }
            }
            string[][] newCol = new string[indicator.Length][];
            for (int i = 0; i < newCol.Length; i++){
                bool yes = inverse ? !indicator[i] : indicator[i];
                newCol[i] = yes ? new[]{"+"} : new string[0];
            }
            mdata.AddCategoryColumn(colName, "", newCol);
        }