コード例 #1
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int    minCount = param.GetParam <int>("Min. count").Value;
            int    selCol   = param.GetParam <int>("Selection").Value;
            string value    = param.GetParam <string>("Value").Value;

            int[]  catIndices = param.GetParam <int[]>("Categories").Value;
            bool[] selection  = null;
            if (selCol < mdata.CategoryColumnCount)
            {
                selection = new bool[mdata.RowCount];
                string[][] x = mdata.GetCategoryColumnAt(selCol);
                for (int i = 0; i < selection.Length; i++)
                {
                    if (x[i] != null)
                    {
                        for (int j = 0; j < x[i].Length; j++)
                        {
                            if (x[i][j].Equals(value))
                            {
                                selection[i] = true;
                                break;
                            }
                        }
                    }
                }
            }
            CountingResult result = CountCategories(mdata, selection, selCol, catIndices);

            CreateMatrixData(result, mdata, minCount, selection);
        }
コード例 #2
0
        public static List <string[][]> GetCategoryColumns(IMatrixData mdata, IList <int> inds)
        {
            List <string[][]> result = new List <string[][]>();

            foreach (int ind in inds)
            {
                result.Add(mdata.GetCategoryColumnAt(ind));
            }
            return(result);
        }
コード例 #3
0
        public static List <string[][]> GetCategoryColumns(IMatrixData mdata)
        {
            List <string[][]> result = new List <string[][]>();

            for (int index = 0; index < mdata.CategoryColumnCount; index++)
            {
                result.Add(mdata.GetCategoryColumnAt(index));
            }
            return(result);
        }
コード例 #4
0
        private static CountingResult CountCategories(IMatrixData data, bool[] selection, int selCol,
                                                      IEnumerable <int> catIndices)
        {
            CountingResult result = new CountingResult();

            foreach (int i in catIndices.Where(i => i != selCol))
            {
                CountTerms(data.CategoryColumnNames[i], data.GetCategoryColumnAt(i), result, selection);
            }
            result.Sort();
            return(result);
        }
コード例 #5
0
        private static int[] GetValidCatCols(IMatrixData data)
        {
            List <int> valids = new List <int>();

            for (int i = 0; i < data.CategoryColumnCount; i++)
            {
                if (!IsInvalidCatColumn(data.GetCategoryColumnAt(i)))
                {
                    valids.Add(i);
                }
            }
            return(valids.ToArray());
        }
コード例 #6
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            if (mdata.CategoryColumnCount < 2)
            {
                processInfo.ErrString = "There are less than two categorical columns available.";
                return;
            }
            int colInd1 = param.GetParam <int>("First column").Value;
            int colInd2 = param.GetParam <int>("Second column").Value;

            string[][] col1   = mdata.GetCategoryColumnAt(colInd1);
            string[][] col2   = mdata.GetCategoryColumnAt(colInd2);
            string[][] result = new string[col1.Length][];
            for (int i = 0; i < result.Length; i++)
            {
                result[i] = CombineTerms(col1[i], col2[i]);
            }
            string colName = mdata.CategoryColumnNames[colInd1] + "_" + mdata.CategoryColumnNames[colInd2];

            mdata.AddCategoryColumn(colName, "", result);
        }
コード例 #7
0
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = param.GetParam <int[]>("Main columns").Value;
     int[] numColInds      = param.GetParam <int[]>("Numerical columns").Value;
     int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value;
     int[] catColInds      = param.GetParam <int[]>("Categorical columns").Value;
     int[] textColInds     = param.GetParam <int[]>("Text columns").Value;
     if (exColInds.Length > 0)
     {
         int ncol = data.ColumnCount;
         data.ExtractColumns(ArrayUtils.Concat(ArrayUtils.ConsecutiveInts(data.ColumnCount), exColInds));
         HashSet <string> taken = new HashSet <string>(data.ColumnNames);
         for (int i = 0; i < exColInds.Length; i++)
         {
             string s = StringUtils.GetNextAvailableName(data.ColumnNames[ncol + i], taken);
             data.ColumnNames[ncol + i] = s;
             taken.Add(s);
         }
     }
     foreach (int ind in numColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.NumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.NumericColumnNames[ind], taken);
         data.AddNumericColumn(s, data.NumericColumnDescriptions[ind], (double[])data.NumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in multiNumColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.MultiNumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.MultiNumericColumnNames[ind], taken);
         data.AddMultiNumericColumn(s, data.MultiNumericColumnDescriptions[ind],
                                    (double[][])data.MultiNumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in catColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.CategoryColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.CategoryColumnNames[ind], taken);
         data.AddCategoryColumn(s, data.CategoryColumnDescriptions[ind], data.GetCategoryColumnAt(ind));
         taken.Add(s);
     }
     foreach (int ind in textColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.StringColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.StringColumnNames[ind], taken);
         data.AddStringColumn(s, data.ColumnDescriptions[ind], (string[])data.StringColumns[ind].Clone());
         taken.Add(s);
     }
 }
コード例 #8
0
 public static bool[] GetIndicatorColumn(bool falseAreIndicated, int catColInd, string word, IMatrixData data)
 {
     string[][] catCol = data.GetCategoryColumnAt(catColInd);
     bool[] result = new bool[data.RowCount];
     for (int i = 0; i < result.Length; i++){
         string[] cats = catCol[i];
         Array.Sort(cats);
         bool contains = Array.BinarySearch(cats, word) >= 0;
         if (falseAreIndicated){
             result[i] = !contains;
         } else{
             result[i] = contains;
         }
     }
     return result;
 }
コード例 #9
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string[][] col  = mdata.GetCategoryColumnAt(param.GetParam <int>("Indicator column").Value);
            string     term = param.GetParam <string>("Value").Value;
            List <int> inds = new List <int>();

            for (int i = 0; i < col.Length; i++)
            {
                if (Contains(col[i], term))
                {
                    inds.Add(i);
                }
            }
            double[][] profiles = new double[inds.Count][];
            for (int i = 0; i < profiles.Length; i++)
            {
                profiles[i] = ArrayUtils.ToDoubles(mdata.Values.GetRow(inds[i]));
                double mean = ArrayUtils.Mean(profiles[i]);
                for (int j = 0; j < profiles[i].Length; j++)
                {
                    profiles[i][j] -= mean;
                }
            }
            double[] totalProfile = new double[mdata.ColumnCount];
            for (int i = 0; i < totalProfile.Length; i++)
            {
                List <double> vals = new List <double>();
                foreach (double[] t in profiles)
                {
                    double val = t[i];
                    if (double.IsNaN(val) || double.IsInfinity(val))
                    {
                        continue;
                    }
                    vals.Add(val);
                }
                totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : double.NaN;
            }
            for (int i = 0; i < mdata.RowCount; i++)
            {
                for (int j = 0; j < mdata.ColumnCount; j++)
                {
                    mdata.Values.Set(i, j, mdata.Values.Get(i, j) - totalProfile[j]);
                }
            }
        }
コード例 #10
0
        public static void CombineRows(this IMatrixData mdata, List <int> rowIdxs, Func <double[], double> combineNumeric,
                                       Func <string[], string> combineString, Func <string[][], string[]> combineCategory,
                                       Func <double[][], double[]> combineMultiNumeric)
        {
            if (!rowIdxs.Any())
            {
                return;
            }
            int resultRow = rowIdxs[0];

            for (int i = 0; i < mdata.Values.ColumnCount; i++)
            {
                BaseVector column = mdata.Values.GetColumn(i);
                BaseVector values = column.SubArray(rowIdxs);
                mdata.Values[resultRow, i] = combineNumeric(ArrayUtils.ToDoubles(values));
            }
            for (int i = 0; i < mdata.NumericColumnCount; i++)
            {
                double[] column = mdata.NumericColumns[i];
                double[] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineNumeric(values);
            }
            for (int i = 0; i < mdata.StringColumnCount; i++)
            {
                string[] column = mdata.StringColumns[i];
                string[] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineString(values);
            }
            for (int i = 0; i < mdata.CategoryColumnCount; i++)
            {
                string[][] column = mdata.GetCategoryColumnAt(i);
                string[][] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineCategory(values);
                mdata.SetCategoryColumnAt(column, i);
            }
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++)
            {
                double[][] column = mdata.MultiNumericColumns[i];
                double[][] values = ArrayUtils.SubArray(column, rowIdxs);
                column[resultRow] = combineMultiNumeric(values);
            }
        }
コード例 #11
0
 public static bool[] GetIndicatorColumn(bool falseAreIndicated, int catColInd, string word, IMatrixData data)
 {
     string[][] catCol = data.GetCategoryColumnAt(catColInd);
     bool[]     result = new bool[data.RowCount];
     for (int i = 0; i < result.Length; i++)
     {
         string[] cats = catCol[i];
         Array.Sort(cats);
         bool contains = Array.BinarySearch(cats, word) >= 0;
         if (falseAreIndicated)
         {
             result[i] = !contains;
         }
         else
         {
             result[i] = contains;
         }
     }
     return(result);
 }
コード例 #12
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string[][] col = mdata.GetCategoryColumnAt(param.GetParam<int>("Indicator column").Value);
            string term = param.GetParam<string>("Value").Value;
            List<int> inds = new List<int>();
            for (int i = 0; i < col.Length; i++){
                if (Contains(col[i], term)){
                    inds.Add(i);
                }
            }
            double[][] profiles = new double[inds.Count][];
            for (int i = 0; i < profiles.Length; i++){
                profiles[i] = ArrayUtils.ToDoubles(mdata.Values.GetRow(inds[i]));
                float mean = (float) ArrayUtils.Mean(profiles[i]);
                for (int j = 0; j < profiles[i].Length; j++){
                    profiles[i][j] -= mean;
                }
            }
            double[] totalProfile = new double[mdata.ColumnCount];
            for (int i = 0; i < totalProfile.Length; i++){
                List<double> vals = new List<double>();
                foreach (double[] t in profiles){
                    double val = t[i];
                    if (double.IsNaN(val) || double.IsInfinity(val)){
                        continue;
                    }
                    vals.Add(val);
                }
                totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : double.NaN;
            }
            for (int i = 0; i < mdata.RowCount; i++){
                for (int j = 0; j < mdata.ColumnCount; j++){
                    mdata.Values.Set(i, j, mdata.Values.Get(i, j)-(float) totalProfile[j]);
                }
            }
        }
コード例 #13
0
 public static int[] GetIndicesOf(IMatrixData data, string categoryName, HashSet<string> values)
 {
     int index = GetIndexOf(data, categoryName);
     List<int> result = new List<int>();
     for (int i = 0; i < data.RowCount; i++){
         string[] s = data.GetCategoryColumnAt(index)[i];
         foreach (string s1 in s){
             if (values.Contains(s1)){
                 result.Add(i);
                 break;
             }
         }
     }
     return result.ToArray();
 }
コード例 #14
0
        public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] multiNumCols = param1.GetMultiChoiceParam("Multi-numeric columns").Value;
            Array.Sort(multiNumCols);
            int[] stringCols = param1.GetMultiChoiceParam("String columns").Value;
            Array.Sort(stringCols);
            HashSet<int> multinumCols2 = new HashSet<int>(multiNumCols);
            HashSet<int> stringCols2 = new HashSet<int>(stringCols);
            if (multiNumCols.Length + stringCols.Length == 0){
                processInfo.ErrString = "Please select some columns.";
                return;
            }
            int rowCount = GetNewRowCount(mdata, multiNumCols, stringCols);
            float[,] expVals = new float[rowCount,mdata.ExpressionColumnCount];
            List<string[]> stringC = new List<string[]>();
            for (int i = 0; i < mdata.StringColumnCount; i++){
                stringC.Add(new string[rowCount]);
            }
            List<double[]> numC = new List<double[]>();
            for (int i = 0; i < mdata.NumericColumnCount; i++){
                numC.Add(new double[rowCount]);
            }
            List<string[][]> catC = new List<string[][]>();
            for (int i = 0; i < mdata.CategoryColumnCount; i++){
                catC.Add(new string[rowCount][]);
            }
            List<double[][]> multiNumC = new List<double[][]>();
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++){
                multiNumC.Add(new double[rowCount][]);
            }
            int count = 0;
            for (int i = 0; i < mdata.RowCount; i++){
                string err;
                int entryCount = GetEntryCount(i, mdata, multiNumCols, stringCols, out err);
                if (err != null){
                    processInfo.ErrString = err;
                    return;
                }
                bool empty = entryCount == 0;
                entryCount = Math.Max(entryCount, 1);
                for (int j = 0; j < entryCount; j++){
                    for (int k = 0; k < mdata.ExpressionColumnCount; k++){
                        expVals[count + j, k] = mdata[i, k];
                    }
                    for (int k = 0; k < mdata.NumericColumnCount; k++){
                        numC[k][count + j] = mdata.NumericColumns[k][i];
                    }
                    for (int k = 0; k < mdata.CategoryColumnCount; k++){
                        catC[k][count + j] = mdata.GetCategoryColumnAt(k)[i];
                    }
                }
                for (int k = 0; k < mdata.MultiNumericColumnCount; k++){
                    if (multinumCols2.Contains(k)){
                        if (empty){
                            multiNumC[k][count] = new double[0];
                        } else{
                            double[] vals = mdata.MultiNumericColumns[k][i];
                            for (int j = 0; j < entryCount; j++){
                                multiNumC[k][count + j] = new[]{vals[j]};
                            }
                        }
                    } else{
                        for (int j = 0; j < entryCount; j++){
                            multiNumC[k][count + j] = mdata.MultiNumericColumns[k][i];
                        }
                    }
                }
                for (int k = 0; k < mdata.StringColumnCount; k++){
                    if (stringCols2.Contains(k)){
                        if (empty){
                            stringC[k][count] = "";
                        } else{
                            string[] vals = mdata.StringColumns[k][i].Split(';');
                            for (int j = 0; j < entryCount; j++){
                                stringC[k][count + j] = vals[j];
                            }
                        }
                    } else{
                        for (int j = 0; j < entryCount; j++){
                            stringC[k][count + j] = mdata.StringColumns[k][i];
                        }
                    }
                }
                count += entryCount;
            }
            int[] multiNumComplement = ArrayUtils.Complement(multiNumCols, mdata.MultiNumericColumnCount);
            List<double[][]> toBeTransformed = ArrayUtils.SubList(multiNumC, multiNumCols);
            multiNumC = ArrayUtils.SubList(multiNumC, multiNumComplement);
            foreach (double[][] d in toBeTransformed){
                numC.Add(Transform(d));
            }
            mdata.SetData(mdata.Name, mdata.ExpressionColumnNames, expVals, mdata.StringColumnNames, stringC,
                mdata.CategoryColumnNames, catC,
                new List<string>(ArrayUtils.Concat(mdata.NumericColumnNames,
                    ArrayUtils.SubList(mdata.MultiNumericColumnNames, multiNumCols))), numC,
                new List<string>(ArrayUtils.SubArray(mdata.MultiNumericColumnNames, multiNumComplement)), multiNumC);
        }
コード例 #15
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string colName = param.GetStringParam("Name of new column").Value;
            int[] columns = param.GetMultiChoiceParam("Categories").Value;
            bool inverse = param.GetBoolParam("Inverse").Value;
            int[] catCols;
            int[] stringCols;
            Split(columns, out catCols, out stringCols, mdata.CategoryColumnCount);
            string[] word1 = param.GetMultiStringParam("Search terms").Value;
            if (word1.Length == 0){
                processInfo.ErrString = "Please specify one or more search terms.";
                return;
            }
            if (string.IsNullOrEmpty(colName)){
                colName = word1[0];
            }
            string[] word = new string[word1.Length];
            for (int i = 0; i < word.Length; i++){
                word[i] = word1[i].ToLower().Trim();
            }
            bool[] indicator = new bool[mdata.RowCount];
            foreach (int col in catCols){
                string[][] cat = mdata.GetCategoryColumnAt(col);
                for (int i = 0; i < cat.Length; i++){
                    foreach (string s in cat[i]){
                        foreach (string s1 in word){
                            if (s.ToLower().Contains(s1)){
                                indicator[i] = true;
                                break;
                            }
                        }
                    }
                }
            }
            foreach (string[] txt in stringCols.Select(col => mdata.StringColumns[col])){
                for (int i = 0; i < txt.Length; i++){
                    string s = txt[i];
                    foreach (string s1 in word){
                        if (s.ToLower().Contains(s1)){
                            indicator[i] = true;
                            break;
                        }
                    }
                }
            }
            string[][] newCol = new string[indicator.Length][];
            for (int i = 0; i < newCol.Length; i++){
                bool yes = inverse ? !indicator[i] : indicator[i];
                newCol[i] = yes ? new[]{"+"} : new string[0];
            }
            mdata.AddCategoryColumn(colName, "", newCol);
        }
コード例 #16
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool keepEmpty = param.GetBoolParam("Keep rows without ID").Value;
            AverageType atype = GetAverageType(param.GetSingleChoiceParam("Average type for expression columns").Value);
            string[] ids2 = mdata.StringColumns[param.GetSingleChoiceParam("ID column").Value];
            string[][] ids = SplitIds(ids2);
            int[] present;
            int[] absent;
            GetPresentAbsentIndices(ids, out present, out absent);
            ids = ArrayUtils.SubArray(ids, present);
            int[][] rowInds = new int[present.Length][];
            for (int i = 0; i < rowInds.Length; i++){
                rowInds[i] = new[]{present[i]};
            }
            ClusterRows(ref rowInds, ref ids);
            if (keepEmpty){
                rowInds = ProlongRowInds(rowInds, absent);
            }
            int nrows = rowInds.Length;
            int ncols = mdata.ExpressionColumnCount;
            float[,] expVals = new float[nrows,ncols];
            for (int j = 0; j < ncols; j++){
                float[] c = mdata.GetExpressionColumn(j);
                for (int i = 0; i < nrows; i++){
                    float[] d = ArrayUtils.SubArray(c, rowInds[i]);
                    expVals[i, j] = Average(d, atype);
                }
            }
            mdata.ExpressionValues = expVals;
            for (int i = 0; i < mdata.NumericColumnCount; i++){
                string name = mdata.NumericColumnNames[i];
                AverageType atype1 = GetAverageType(param.GetSingleChoiceParam("Average type for " + name).Value);
                double[] c = mdata.NumericColumns[i];
                double[] newCol = new double[nrows];
                for (int k = 0; k < nrows; k++){
                    double[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d, atype1);
                }
                mdata.NumericColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.CategoryColumnCount; i++){
                string[][] c = mdata.GetCategoryColumnAt(i);
                string[][] newCol = new string[nrows][];
                for (int k = 0; k < nrows; k++){
                    string[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.SetCategoryColumnAt(newCol,i);
            }
            for (int i = 0; i < mdata.StringColumnCount; i++){
                string[] c = mdata.StringColumns[i];
                string[] newCol = new string[nrows];
                for (int k = 0; k < nrows; k++){
                    string[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.StringColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++){
                double[][] c = mdata.MultiNumericColumns[i];
                double[][] newCol = new double[nrows][];
                for (int k = 0; k < nrows; k++){
                    double[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.MultiNumericColumns[i] = newCol;
            }
        }
コード例 #17
0
 public static List<string[][]> GetCategoryColumns(IMatrixData mdata, IList<int> inds)
 {
     List<string[][]> result = new List<string[][]>();
     foreach (int ind in inds){
         result.Add(mdata.GetCategoryColumnAt(ind));
     }
     return result;
 }
コード例 #18
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int minCount = param.GetIntParam("Min. count").Value;
            int selCol = param.GetSingleChoiceParam("Selection").Value;
            string value = param.GetStringParam("Value").Value;
            int[] catIndices = param.GetMultiChoiceParam("Categories").Value;
            bool[] selection = null;
            if (selCol < mdata.CategoryColumnCount){
                selection = new bool[mdata.RowCount];
                string[][] x = mdata.GetCategoryColumnAt(selCol);
                for (int i = 0; i < selection.Length; i++){
                    if (x[i] != null){
                        for (int j = 0; j < x[i].Length; j++){
                            if (x[i][j].Equals(value)){
                                selection[i] = true;
                                break;
                            }
                        }
                    }
                }
            }
            CountingResult result = CountCategories(mdata, selection, selCol, catIndices);
            CreateMatrixData(result, mdata, minCount, selection);
        }
コード例 #19
0
        protected void GetExperminetValues(IMatrixData summary, IMatrixData experimentalDesignTemplate,
                                           IMatrixData experiment, IMatrixData spectraRef, ref List<MsRunImpl> msruns,
                                           ref List<StudyVariable> studyvariables, ref List<Assay> assays,
                                           ref List<Sample> samples, ref List<Instrument> instruments)
        {
            if (msruns == null) {
                msruns = new List<MsRunImpl>();
            }

            if (studyvariables == null) {
                studyvariables = new List<StudyVariable>();
            }

            if (assays == null) {
                assays = new List<Assay>();
            }

            if (samples == null) {
                samples = new List<Sample>();
            }

            if (instruments == null) {
                instruments = new List<Instrument>();
            }

            #region parse experiment

            if (experiment != null) {
                int studyvarIndex = experiment.StringColumnNames.IndexOf(MetadataElement.STUDY_VARIABLE.Name);
                int assayIndex = experiment.StringColumnNames.IndexOf(MetadataElement.ASSAY.Name);
                int msrunIndex = experiment.StringColumnNames.IndexOf(MetadataElement.MS_RUN.Name);
                int sampleIndex = experiment.StringColumnNames.IndexOf(MetadataElement.SAMPLE.Name);

                Regex sampleRegex = new Regex(@"^([^\[]+) <([^;]*);([^;]*);([^;]*);([^;]*)>");
                Regex runRegex = new Regex(@"^([^\[]+) <([^;]*);([^;]*);([^;]*);([^;]*)>");
                Regex assayRegex = new Regex(@"^([^\[]+) <([^>]*)>");

                for (int row = 0; row < experiment.RowCount; row++) {
                    string studyvariableDescription = experiment.StringColumns[studyvarIndex][row];
                    string assayReagent = experiment.StringColumns[assayIndex][row];
                    string msrunText = experiment.StringColumns[msrunIndex][row];
                    string sampleDescription = experiment.StringColumns[sampleIndex][row];
                    Lib.Model.Param specie = null;
                    Lib.Model.Param tissue = null;
                    Lib.Model.Param cellType = null;
                    Lib.Model.Param disease = null;
                    IList<Lib.Model.Param> mod = new List<Lib.Model.Param>();

                    if (sampleDescription != null && sampleRegex.IsMatch(sampleDescription)) {
                        var match = sampleRegex.Match(sampleDescription);
                        sampleDescription = match.Groups[1].Value;

                        string temp = match.Groups[2].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            specie = cv.GetParam(temp, "NEWT");
                        }

                        temp = match.Groups[3].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            tissue = cv.GetParam(temp, "BTO");
                        }

                        temp = match.Groups[4].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            cellType = cv.GetParam(temp, "CL");
                        }

                        temp = match.Groups[5].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            disease = cv.GetParam(temp, "DOID");
                        }
                    }
                    if (assayRegex != null && assayRegex.IsMatch(assayReagent)) {
                        var match = assayRegex.Match(assayReagent);
                        string temp = match.Groups[2].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            foreach (var t in temp.Split(';')) {
                                mod.Add(cv.GetParam(t, "PRIDE"));
                            }
                        }

                        assayReagent = match.Groups[1].Value;
                    }

                    string filename = null;
                    string path = null;
                    Lib.Model.Param format = null;
                    Lib.Model.Param idformat = null;
                    Lib.Model.Param fragementaion = null;
                    if (runRegex != null && runRegex.IsMatch(msrunText)) {
                        var match = runRegex.Match(msrunText);
                        filename = match.Groups[1].Value;

                        string temp = match.Groups[2].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            path = temp;
                        }

                        temp = match.Groups[3].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            format = cv.GetParam(temp, "MS");
                        }

                        temp = match.Groups[4].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            idformat = cv.GetParam(temp, "MS");
                        }

                        temp = match.Groups[5].Value;
                        if (!String.IsNullOrEmpty(temp)) {
                            fragementaion = cv.GetParam(temp, "MS");
                        }
                    }

                    StudyVariable studyvariable;
                    if (!studyvariables.Any(x => x.Description.Equals(studyvariableDescription))) {
                        studyvariable = new StudyVariable(studyvariables.Count + 1) {
                            Description = studyvariableDescription
                        };
                        studyvariables.Add(studyvariable);
                    } else {
                        studyvariable = studyvariables.First(x => x.Description.Equals(studyvariableDescription));
                    }

                    Assay assay = new Assay(assays.Count + 1) {
                        QuantificationReagent = cv.GetParam(assayReagent, "PRIDE")
                    };

                    foreach (var m in mod) {
                        if (m == null) {
                            continue;
                        }
                        assay.addQuantificationMod(new AssayQuantificationMod(assay,
                                                                              assay.QuantificationModMap.Count + 1) {
                                                                                  Param = m
                                                                              });
                    }

                    assays.Add(assay);

                    MsRunImpl msrun;
                    if (!String.IsNullOrEmpty(filename) &&
                        !msruns.Any(x => x.Description != null && x.Description.Equals(filename))) {
                        msrun = new MsRunImpl(msruns.Count + 1) {
                            Format = format,
                            IdFormat = idformat,
                            FragmentationMethod = fragementaion
                        };

                        msruns.Add(msrun);
                        msrun.Location = new Url(String.IsNullOrEmpty(path) ? filename : Path.Combine(path, filename));
                    } else {
                        msrun = msruns.First(x => x.Description != null && x.Description.Equals(filename));
                    }

                    Sample sample;
                    if (!samples.Any(x => x.Description.Equals(sampleDescription))) {
                        sample = new Sample(samples.Count + 1) { Description = sampleDescription };
                        if (specie != null) {
                            sample.AddSpecies(specie);
                        }
                        if (tissue != null) {
                            sample.AddTissue(tissue);
                        }
                        if (cellType != null) {
                            sample.AddCellType(cellType);
                        }
                        if (disease != null) {
                            sample.AddDisease(disease);
                        }
                        samples.Add(sample);
                    } else {
                        sample = samples.First(x => x.Description.Equals(sampleDescription));
                    }

                    if (!studyvariable.AssayMap.ContainsKey(assay.Id)) {
                        studyvariable.AddAssay(assay);
                    }
                    if (!studyvariable.SampleMap.ContainsKey(sample.Id)) {
                        studyvariable.AddSample(sample);
                    }

                    assay.MsRun = msrun;
                    assay.Sample = sample;
                }
            }

            #endregion

            Dictionary<int, IList<string>> dictionary = new Dictionary<int, IList<string>>();

            #region parse experimentalDesign

            if (experimentalDesignTemplate != null) {
                string[] rawfiles = null;

                int index = Constants.GetKeywordIndex(experimentalDesign.rawfile,
                                                     experimentalDesignTemplate.StringColumnNames);
                if (index != -1) {
                    rawfiles = experimentalDesignTemplate.StringColumns[index];
                }

                string[] experimentNames = null;
                if (
                    (index =
                     Constants.GetKeywordIndex(experimentalDesign.variable,
                                              experimentalDesignTemplate.StringColumnNames)) !=
                    -1) {
                    experimentNames = experimentalDesignTemplate.StringColumns[index];
                } else if (
                      (index =
                       Constants.GetKeywordIndex(experimentalDesign.variable,
                                                experimentalDesignTemplate.CategoryColumnNames)) != -1) {
                    experimentNames = MzTabMatrixUtils.ConvertToStringArray(experimentalDesignTemplate.GetCategoryColumnAt(index));
                }

                if (rawfiles != null && experimentNames != null) {
                    for (int i = 0; i < rawfiles.Length && i < experimentNames.Length; i++) {
                        string name = experimentNames[i];
                        StudyVariable variable = studyvariables.FirstOrDefault(x => x.Description.Equals(name));
                        if (variable == null) {
                            variable = new StudyVariable(studyvariables.Count + 1) { Description = name };
                            studyvariables.Add(variable);
                        }

                        string rawfile = rawfiles[i];
                        MsRunImpl runImpl = msruns.FirstOrDefault(x => x.Description.Equals(rawfile));
                        if (runImpl == null) {
                            runImpl = new MsRunImpl(msruns.Count + 1) {
                                Location = new Url(rawfile),
                                Format = cv.GetParam("MS:1000563", "MS"),
                                IdFormat = cv.GetParam("MS:1000768", "MS")
                            };
                            msruns.Add(runImpl);
                        }

                        if (rawfile != null) {
                            if (!dictionary.ContainsKey(variable.Id)) {
                                dictionary.Add(variable.Id, new List<string>());
                            }
                            dictionary[variable.Id].Add(rawfile);
                        }
                    }
                } else {
                    Console.Out.WriteLine("Rawfiles " + rawfiles);
                    Console.Out.WriteLine("experimentNames " + experimentNames);
                    throw new Exception("Could not parse " + Matrix.ExperimentalDesign);
                }
            }

            #endregion

            #region add default samples from studyvariables

            if (studyvariables != null && studyvariables.Count > 0) {
                foreach (StudyVariable variable in studyvariables) {
                    string text = variable.Description;

                    Sample sample = samples.FirstOrDefault(x => text.Contains(x.Description));
                    if (sample == null) {
                        sample = new Sample(samples.Count + 1) { Description = text };
                        samples.Add(sample);
                    }
                    variable.AddSample(sample);
                }
            }

            #endregion

            #region parse summary

            if (summary != null) {
                int maxRow = msruns.Count;

                string multi = "1";
                string[] labels0 = null;
                int index;

                if ((index = Constants.GetKeywordIndex(Utils.summary.labels0, summary.StringColumnNames)) != -1) {
                    labels0 = summary.StringColumns[index];
                    multi = "1";
                } else if ((index = Constants.GetKeywordIndex(Utils.summary.labels0, summary.CategoryColumnNames)) !=
                           -1) {
                    labels0 = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index));
                    multi = "1";
                }

                string[] labels1 = null;
                if ((index = Constants.GetKeywordIndex(Utils.summary.labels1, summary.StringColumnNames)) != -1) {
                    labels1 = summary.StringColumns[index];
                    multi = "2";
                } else if ((index = Constants.GetKeywordIndex(Utils.summary.labels1, summary.CategoryColumnNames)) !=
                           -1) {
                    labels1 = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index));
                    multi = "2";
                }

                string[] labels2 = null;
                if ((index = Constants.GetKeywordIndex(Utils.summary.labels2, summary.StringColumnNames)) != -1) {
                    labels2 = summary.StringColumns[index];
                    multi = "3";
                } else if ((index = Constants.GetKeywordIndex(Utils.summary.labels2, summary.CategoryColumnNames)) !=
                           -1) {
                    labels2 = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index));
                    multi = "3";
                }

                string[] multiplicity;
                if ((index = Constants.GetKeywordIndex(Utils.summary.multiplicity, summary.StringColumnNames)) !=
                    -1) {
                    multiplicity = summary.StringColumns[index];
                    multiplicity = multiplicity.Where(x => !String.IsNullOrEmpty(x)).ToArray();
                } else if (
                      (index =
                       Constants.GetKeywordIndex(Utils.summary.multiplicity, summary.CategoryColumnNames)) !=
                      -1) {
                    multiplicity = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index));
                    multiplicity = multiplicity.Where(x => !String.IsNullOrEmpty(x)).ToArray();
                } else {
                    multiplicity = new string[maxRow];
                    for (int i = 0; i < multiplicity.Length; i++) {
                        multiplicity[i] = multi;
                    }
                }

                string[] labels;
                switch (multi) {
                    case "1":
                        labels = null;
                        break;
                    case "2":
                        labels = new[] { "L", "H" };
                        break;
                    case "3":
                        labels = new[] { "L", "H", "M" };
                        break;
                    default:
                        labels = null;
                        break;
                }

                if (labels != null) {
                    List<StudyVariable> list = new List<StudyVariable>();
                    Dictionary<int, IList<string>> dict = new Dictionary<int, IList<string>>();

                    foreach (StudyVariable studyVariable in studyvariables){
                        foreach (var variable in SILAC(studyVariable, labels)) {
                            IList<string> rawfile = null;
                            if (dictionary.ContainsKey(variable.Id)) {
                                rawfile = dictionary[variable.Id];
                            }

                            StudyVariable tmp = new StudyVariable(list.Count + 1){Description = variable.Description};
                            tmp.AddAllAssays(variable.AssayMap.Values.ToList());
                            tmp.AddAllSamples(variable.SampleMap.Values.ToList());

                            list.Add(tmp);

                            if (rawfile != null) {
                                if (!dict.ContainsKey(tmp.Id)) {
                                    dict.Add(tmp.Id, rawfile);
                                }
                            }
                        }
                    }
                    studyvariables = list;
                    dictionary = dict;
                }

                string[] rawfiles = null;
                if ((index = Constants.GetKeywordIndex(Utils.summary.rawfile, summary.StringColumnNames)) != -1) {
                    rawfiles = summary.StringColumns[index];
                    rawfiles = rawfiles.Where(x => !String.IsNullOrEmpty(x)).ToArray();
                } else if ((index = Constants.GetKeywordIndex(Utils.summary.rawfile, summary.CategoryColumnNames)) !=
                           -1) {
                    rawfiles = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index));
                    rawfiles = rawfiles.Where(x => !String.IsNullOrEmpty(x)).ToArray();
                }

                string[] orbitrapInstruments = new[] { "LTQ Orbitrap", "LTQ Orbitrap XL", "LTQ Orbitrap Velos", "LTQ Orbitrap Elite", "Q Exactive" };
                string[] instrument = null;
                if ((index = Constants.GetKeywordIndex(Utils.summary.instrument, summary.StringColumnNames)) != -1) {
                    instrument = summary.StringColumns[index];
                    instrument = instrument.Where(x => !String.IsNullOrEmpty(x)).ToArray();
                } else if ((index = Constants.GetKeywordIndex(Utils.summary.instrument, summary.CategoryColumnNames)) !=
                           -1) {
                    instrument = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index));
                    instrument = instrument.Where(x => !String.IsNullOrEmpty(x)).ToArray();
                }

                if (rawfiles != null) {
                    for (int i = 0; i < rawfiles.Length; i++) {
                        int id = assays.Count + 1;
                        string rawfile = rawfiles[i];

                        if (!dictionary.Values.Any(x => x.Contains(rawfile))) {
                            continue;
                        }

                        IList<StudyVariable> temp = new List<StudyVariable>();
                        foreach (var v in dictionary.Where(x => x.Value.Contains(rawfile))) {
                            temp.Add(studyvariables.FirstOrDefault(x => x.Id == v.Key));
                        }

                        StudyVariable variable1 = null;
                        StudyVariable variable2 = null;
                        StudyVariable variable3 = null;
                        if (temp != null) {
                            if (temp.Any()) {
                                variable1 = temp[0];
                            }
                            if (temp.Count() > 1) {
                                variable2 = temp[1];
                            }
                            if (temp.Count() > 2) {
                                variable3 = temp[2];
                            }
                        }

                        if (multiplicity[i].Equals("1")) {
                            #region Add assay for label free

                            Assay assay = new Assay(id) {
                                QuantificationReagent = cv.GetParam("Unlabeled sample", "PRIDE"),
                                MsRun = msruns[i]
                            };
                            if (variable1 != null) {
                                assay.Sample = variable1.SampleMap.Values.FirstOrDefault();
                                variable1.AddAssay(assay);
                            }
                            assays.Add(assay);

                            #endregion
                        } else if (multiplicity[i].Equals("2")) {
                            #region Add assays for Double SILAC labeling

                            Assay assay = new Assay(id) {
                                QuantificationReagent = cv.GetParam("SILAC light", "PRIDE"),
                                MsRun = msruns[i]
                            };
                            IList<AssayQuantificationMod> mods = MzTabMatrixUtils.GetQuantificationMod(labels0, i, assay);
                            if (mods != null) {
                                foreach (var m in mods) {
                                    assay.addQuantificationMod(m);
                                }
                            }
                            if (variable1 != null) {
                                assay.Sample = variable1.SampleMap.Values.FirstOrDefault();
                                variable1.AddAssay(assay);
                            }
                            assays.Add(assay);

                            assay = new Assay(id + 1) {
                                QuantificationReagent = cv.GetParam("SILAC heavy", "PRIDE"),
                                MsRun = msruns[i]
                            };
                            mods = MzTabMatrixUtils.GetQuantificationMod(labels1, i, assay);
                            if (mods != null) {
                                foreach (var m in mods) {
                                    assay.addQuantificationMod(m);
                                }
                            }
                            if (variable2 != null) {
                                assay.Sample = variable2.SampleMap.Values.FirstOrDefault();
                                variable2.AddAssay(assay);
                            }
                            assays.Add(assay);

                            #endregion
                        } else if (multiplicity[i].Equals("3")) {
                            #region Add assays for Triple SILAC labeling

                            Assay assay = new Assay(id) {
                                QuantificationReagent = cv.GetParam("SILAC light", "PRIDE"),
                                MsRun = msruns[i]
                            };
                            IList<AssayQuantificationMod> mods = MzTabMatrixUtils.GetQuantificationMod(labels0, i, assay);
                            if (mods != null) {
                                foreach (var m in mods) {
                                    assay.addQuantificationMod(m);
                                }
                            }
                            if (variable1 != null) {
                                assay.Sample = variable1.SampleMap.Values.FirstOrDefault();
                                variable1.AddAssay(assay);
                            }
                            assays.Add(assay);

                            assay = new Assay(id + 1) {
                                QuantificationReagent = cv.GetParam("SILAC medium", "PRIDE"),
                                MsRun = msruns[i]
                            };
                            mods = MzTabMatrixUtils.GetQuantificationMod(labels1, i, assay);
                            if (mods != null) {
                                foreach (var m in mods) {
                                    assay.addQuantificationMod(m);
                                }
                            }
                            if (variable2 != null) {
                                assay.Sample = variable2.SampleMap.Values.FirstOrDefault();
                                variable2.AddAssay(assay);
                            }
                            assays.Add(assay);

                            assay = new Assay(id + 2) {
                                QuantificationReagent = cv.GetParam("SILAC heavy", "PRIDE"),
                                MsRun = msruns[i]
                            };
                            mods = MzTabMatrixUtils.GetQuantificationMod(labels2, i, assay);
                            if (mods != null) {
                                foreach (var m in mods) {
                                    assay.addQuantificationMod(m);
                                }
                            }
                            if (variable3 != null) {
                                assay.Sample = variable3.SampleMap.Values.FirstOrDefault();
                                variable3.AddAssay(assay);
                            }
                            assays.Add(assay);

                            #endregion
                        }

                        if (instrument != null && !String.IsNullOrEmpty(instrument[i])) {
                            var tmp = new Instrument(instruments.Count + 1) { Name = cv.GetParam(instrument[i], "MS") };
                            if (orbitrapInstruments.Contains(instrument[i])) {
                                tmp.Source = cv.GetParam("electrospray ionization", "MS");
                                tmp.Analyzer = cv.GetParam("orbitrap", "MS");
                            }
                            instruments.Add(tmp);
                        }
                    }
                }
            }

            #endregion

            #region parse search

            Lib.Model.Param run_idFormat = cv.GetParam("MS:1000774", "MS");
            Lib.Model.Param run_format = cv.GetParam("Andromeda Peak list file", "MS");

            if (spectraRef != null && Constants.GetKeywordName(Utils.spectraRef.location, spectraRef.StringColumnNames) != null) {
                int colindex = Constants.GetKeywordIndex(Utils.spectraRef.location, spectraRef.StringColumnNames);
                string[] values =
                    ArrayUtils.UniqueValues(spectraRef.StringColumns[colindex]);

                for (int i = 0; i < values.Length; i++) {
                    Lib.Model.Param frag = null;
                    if (values[i].Contains("CID")) {
                        frag = cv.GetParam("MS:1000133", "MS");
                    } else if (values[i].Contains("HCD")) {
                        frag = cv.GetParam("MS:1000422", "MS");
                    }

                    msruns.Add(new MsRunImpl(msruns.Count + 1) {
                        IdFormat = run_idFormat,
                        Format = run_format,
                        FragmentationMethod = frag,
                        Location = new Url(values[i])
                    });
                }
            } else {
                msruns.Add(new MsRunImpl(msruns.Count + 1) {
                    IdFormat = run_idFormat,
                    Format = run_format
                });
            }

            #endregion
        }
コード例 #20
0
 public void Export(Parameters parameters, IMatrixData data, ProcessInfo processInfo)
 {
     string filename = parameters.GetFileParam("File name").Value;
     StreamWriter writer;
     try{
         writer = new StreamWriter(filename);
     } catch (Exception e){
         processInfo.ErrString = e.Message;
         return;
     }
     List<string> words = new List<string>();
     for (int i = 0; i < data.ExpressionColumnCount; i++){
         words.Add(Trunc(data.ExpressionColumnNames[i]));
     }
     for (int i = 0; i < data.CategoryColumnCount; i++){
         words.Add(Trunc(data.CategoryColumnNames[i]));
     }
     for (int i = 0; i < data.NumericColumnCount; i++){
         words.Add(Trunc(data.NumericColumnNames[i]));
     }
     for (int i = 0; i < data.StringColumnCount; i++){
         words.Add(Trunc(data.StringColumnNames[i]));
     }
     for (int i = 0; i < data.MultiNumericColumnCount; i++){
         words.Add(Trunc(data.MultiNumericColumnNames[i]));
     }
     writer.WriteLine(StringUtils.Concat("\t", words));
     if (HasAnyDescription(data)){
         words = new List<string>();
         for (int i = 0; i < data.ExpressionColumnCount; i++){
             words.Add(Trunc(data.ExpressionColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.CategoryColumnCount; i++){
             words.Add(Trunc(data.CategoryColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.NumericColumnCount; i++){
             words.Add(Trunc(data.NumericColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.StringColumnCount; i++){
             words.Add(Trunc(data.StringColumnDescriptions[i] ?? ""));
         }
         for (int i = 0; i < data.MultiNumericColumnCount; i++){
             words.Add(Trunc(data.MultiNumericColumnDescriptions[i] ?? ""));
         }
         writer.WriteLine("#!{Description}" + StringUtils.Concat("\t", words));
     }
     words = new List<string>();
     for (int i = 0; i < data.ExpressionColumnCount; i++){
         words.Add("E");
     }
     for (int i = 0; i < data.CategoryColumnCount; i++){
         words.Add("C");
     }
     for (int i = 0; i < data.NumericColumnCount; i++){
         words.Add("N");
     }
     for (int i = 0; i < data.StringColumnCount; i++){
         words.Add("T");
     }
     for (int i = 0; i < data.MultiNumericColumnCount; i++){
         words.Add("M");
     }
     writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", words));
     for (int i = 0; i < data.NumericRowCount; i++){
         words = new List<string>();
         for (int j = 0; j < data.ExpressionColumnCount; j++){
             words.Add("" + data.NumericRows[i][j]);
         }
         for (int j = 0; j < data.CategoryColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.NumericColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.StringColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.MultiNumericColumnCount; j++){
             words.Add("");
         }
         writer.WriteLine("#!{N:" + data.NumericRowNames[i] + "}" + StringUtils.Concat("\t", words));
     }
     for (int i = 0; i < data.CategoryRowCount; i++){
         words = new List<string>();
         for (int j = 0; j < data.ExpressionColumnCount; j++){
             string[] s = data.GetCategoryRowAt(i)[j];
             words.Add(s.Length == 0 ? "" : StringUtils.Concat(";", s));
         }
         for (int j = 0; j < data.CategoryColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.NumericColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.StringColumnCount; j++){
             words.Add("");
         }
         for (int j = 0; j < data.MultiNumericColumnCount; j++){
             words.Add("");
         }
         writer.WriteLine("#!{C:" + data.CategoryRowNames[i] + "}" + StringUtils.Concat("\t", words));
     }
     for (int j = 0; j < data.RowCount; j++){
         words = new List<string>();
         for (int i = 0; i < data.ExpressionColumnCount; i++){
             words.Add(Trunc("" + data[j, i]));
         }
         for (int i = 0; i < data.CategoryColumnCount; i++){
             string[] q = data.GetCategoryColumnAt(i)[j] ?? new string[0];
             words.Add(Trunc((q.Length > 0 ? StringUtils.Concat(";", q) : "")));
         }
         for (int i = 0; i < data.NumericColumnCount; i++){
             words.Add(Trunc("" + data.NumericColumns[i][j]));
         }
         for (int i = 0; i < data.StringColumnCount; i++){
             words.Add(Trunc(data.StringColumns[i][j]));
         }
         for (int i = 0; i < data.MultiNumericColumnCount; i++){
             double[] q = data.MultiNumericColumns[i][j];
             words.Add(Trunc((q.Length > 0 ? StringUtils.Concat(";", q) : "")));
         }
         string s = StringUtils.Concat("\t", words);
         s = s.Replace("\"", "");
         writer.WriteLine(s);
     }
     writer.Close();
 }
コード例 #21
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            bool        keepEmpty = param.GetParam <bool>("Keep rows without ID").Value;
            AverageType atype     = GetAverageType(param.GetParam <int>("Average type for expression columns").Value);

            string[]   ids2 = mdata.StringColumns[param.GetParam <int>("ID column").Value];
            string[][] ids  = SplitIds(ids2);
            int[]      present;
            int[]      absent;
            GetPresentAbsentIndices(ids, out present, out absent);
            ids = ArrayUtils.SubArray(ids, present);
            int[][] rowInds = new int[present.Length][];
            for (int i = 0; i < rowInds.Length; i++)
            {
                rowInds[i] = new[] { present[i] };
            }
            ClusterRows(ref rowInds, ref ids);
            if (keepEmpty)
            {
                rowInds = ProlongRowInds(rowInds, absent);
            }
            int nrows = rowInds.Length;
            int ncols = mdata.ColumnCount;

            float[,] expVals = new float[nrows, ncols];
            for (int j = 0; j < ncols; j++)
            {
                double[] c = ArrayUtils.ToDoubles(mdata.Values.GetColumn(j));
                for (int i = 0; i < nrows; i++)
                {
                    double[] d = ArrayUtils.SubArray(c, rowInds[i]);
                    expVals[i, j] = (float)Average(d, atype);
                }
            }
            mdata.Values.Set(expVals);
            for (int i = 0; i < mdata.NumericColumnCount; i++)
            {
                string      name   = mdata.NumericColumnNames[i];
                AverageType atype1 = GetAverageType(param.GetParam <int>("Average type for " + name).Value);
                double[]    c      = mdata.NumericColumns[i];
                double[]    newCol = new double[nrows];
                for (int k = 0; k < nrows; k++)
                {
                    double[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d, atype1);
                }
                mdata.NumericColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.CategoryColumnCount; i++)
            {
                string[][] c      = mdata.GetCategoryColumnAt(i);
                string[][] newCol = new string[nrows][];
                for (int k = 0; k < nrows; k++)
                {
                    string[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.SetCategoryColumnAt(newCol, i);
            }
            for (int i = 0; i < mdata.StringColumnCount; i++)
            {
                string[] c      = mdata.StringColumns[i];
                string[] newCol = new string[nrows];
                for (int k = 0; k < nrows; k++)
                {
                    string[] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.StringColumns[i] = newCol;
            }
            for (int i = 0; i < mdata.MultiNumericColumnCount; i++)
            {
                double[][] c      = mdata.MultiNumericColumns[i];
                double[][] newCol = new double[nrows][];
                for (int k = 0; k < nrows; k++)
                {
                    double[][] d = ArrayUtils.SubArray(c, rowInds[k]);
                    newCol[k] = Average(d);
                }
                mdata.MultiNumericColumns[i] = newCol;
            }
        }
コード例 #22
0
 public static List<string[][]> GetCategoryColumns(IMatrixData mdata)
 {
     List<string[][]> result = new List<string[][]>();
     for (int index = 0; index < mdata.CategoryColumnCount; index++){
         result.Add(mdata.GetCategoryColumnAt(index));
     }
     return result;
 }
コード例 #23
0
        public void SmallTest()
        {
            IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ]
            {
                { 0, 4 },
                { 1, 5 },
                { 2, 6 },
                { 3, 7 }
            });

            mdata.AddStringColumn("id", "", new [] { "a", "b", "b", "b" });
            mdata.AddStringColumn("str", "", new [] { "a;b", "b;c", "c;d", "d;e" });
            mdata.AddCategoryColumn("cat", "", new[] { new[] { "a", "b" }, new[] { "b", "c" }, new[] { "c", "d" }, new[] { "d", "e" } });
            mdata.AddNumericColumn("num", "", new [] { 0, 1, 2, 3, 4.0 });
            mdata.AddMultiNumericColumn("mnum", "", new [] { new [] { 0, 4d }, new [] { 1, 5d }, new [] { 2, 6d }, new [] { 3, 7d } });
            mdata.UniqueRows(mdata.StringColumns[0], ArrayUtils.Median, UniqueRows.Union, UniqueRows.CatUnion, UniqueRows.MultiNumUnion);

            Assert.AreEqual(2, mdata.RowCount);
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.Values.GetColumn(0));
            CollectionAssert.AreEqual(new [] { 4, 6 }, mdata.Values.GetColumn(1));
            CollectionAssert.AreEqual(new [] { "a;b", "b;c;d;e" }, mdata.GetStringColumn("str"));
            CollectionAssert.AreEqual(new [] { new [] { "a", "b" }, new [] { "b", "c", "d", "e" } }, mdata.GetCategoryColumnAt(0));
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.NumericColumns[0]);
            CollectionAssert.AreEqual(new [] { new [] { 0d, 4 }, new [] { 1d, 5, 2, 6, 3, 7 } }, mdata.MultiNumericColumns[0]);
        }
コード例 #24
0
ファイル: FilterDE.cs プロジェクト: neuhauser/perseus-plugins
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int colIndex = param.GetSingleChoiceParam("Column with second last AA").Value;
            string aas = param.GetStringParam("Amino acids").Value;
            string[][] col = mdata.GetCategoryColumnAt(colIndex);
            List<int> validRows = new List<int>();
            for (int i = 0; i < mdata.RowCount; i++){
                string[] x = col[i];
                for (int j = 0; j < x.Length; j++){
                    if (x[j].Length != 1){
                        processInfo.ErrString = "Some of the entries in column " + mdata.CategoryColumnNames[colIndex] +
                            " do not contain amino acids";
                        return;
                    }
                }
                bool keep = JudgeIfKept(aas, x);
                if (keep){
                    validRows.Add(i);
                }
            }
            mdata.ExtractExpressionRows(validRows.ToArray());
        }
コード例 #25
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            SingleChoiceWithSubParams p = param.GetSingleChoiceWithSubParams("Column");
            int colInd = p.Value;
            if (colInd < 0){
                processInfo.ErrString = "No categorical columns available.";
                return;
            }
            MultiChoiceParam mcp = p.GetSubParameters().GetMultiChoiceParam("Values");
            int[] inds = mcp.Value;
            if (inds.Length == 0){
                processInfo.ErrString = "Please select at least one term for filtering.";
                return;
            }
            string[] values = new string[inds.Length];
            for (int i = 0; i < values.Length; i++){
                values[i] = mdata.GetCategoryColumnValuesAt(colInd)[inds[i]];
            }
            HashSet<string> value = new HashSet<string>(values);
            bool remove = param.GetSingleChoiceParam("Mode").Value == 0;
            string[][] cats = mdata.GetCategoryColumnAt(colInd);
            List<int> valids = new List<int>();
            for (int i = 0; i < cats.Length; i++){
                bool valid = true;
                foreach (string w in cats[i]){
                    if (value.Contains(w)){
                        valid = false;
                        break;
                    }
                }
                if ((valid && remove) || (!valid && !remove)){
                    valids.Add(i);
                }
            }
            PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray());
        }
コード例 #26
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            if (mdata.CategoryColumnCount < 2){
                processInfo.ErrString = "There are less than two categorical columns available.";
                return;
            }
            int colInd1 = param.GetSingleChoiceParam("First column").Value;
            int colInd2 = param.GetSingleChoiceParam("Second column").Value;
            string[][] col1 = mdata.GetCategoryColumnAt(colInd1);
            string[][] col2 = mdata.GetCategoryColumnAt(colInd2);
            string[][] result = new string[col1.Length][];
            for (int i = 0; i < result.Length; i++){
                result[i] = CombineTerms(col1[i], col2[i]);
            }
            string colName = mdata.CategoryColumnNames[colInd1] + "_" + mdata.CategoryColumnNames[colInd2];
            mdata.AddCategoryColumn(colName, "", result);
        }
コード例 #27
0
 private static List<string[][]> GetCategoryColumns(IMatrixData mdata)
 {
     List<string[][]> result = new List<string[][]>();
     for (int i = 0; i < mdata.CategoryColumnCount; i++) {
         result.Add(mdata.GetCategoryColumnAt(i));
     }
     return result;
 }
コード例 #28
0
 public static CountingResult CountCategories(IMatrixData data, bool[] selection, int selCol, int[] catIndices)
 {
     CountingResult result = new CountingResult();
     foreach (int i in catIndices.Where(i => i != selCol)){
         CountTerms(data.CategoryColumnNames[i], data.GetCategoryColumnAt(i), result, selection);
     }
     result.Sort();
     return result;
 }
コード例 #29
0
 private static int[] GetValidCatCols(IMatrixData data)
 {
     List<int> valids = new List<int>();
     for (int i = 0; i < data.CategoryColumnCount; i++){
         if (!IsInvalidCatColumn(data.GetCategoryColumnAt(i))){
             valids.Add(i);
         }
     }
     return valids.ToArray();
 }
コード例 #30
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            string[][] col = mdata.GetCategoryColumnAt(param.GetSingleChoiceParam("Indicator column").Value);
            string term = param.GetStringParam("Value").Value;
            List<int> inds = new List<int>();
            for (int i = 0; i < col.Length; i++){
                if (Contains(col[i], term)){
                    inds.Add(i);
                }
            }
            float[][] profiles = new float[inds.Count][];
            for (int i = 0; i < profiles.Length; i++){
                profiles[i] = mdata.GetExpressionRow(inds[i]);
                float mean = (float) ArrayUtils.Mean(profiles[i]);
                for (int j = 0; j < profiles[i].Length; j++){
                    profiles[i][j] -= mean;
                }
            }
            float[] totalProfile = new float[mdata.ExpressionColumnCount];
            for (int i = 0; i < totalProfile.Length; i++){
                List<float> vals = new List<float>();
                foreach (float[] t in profiles){
                    float val = t[i];
                    if (float.IsNaN(val) || float.IsInfinity(val)){
                        continue;
                    }
                    vals.Add(val);
                }
                totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : float.NaN;
            }
            for (int i = 0; i < mdata.RowCount; i++){
                for (int j = 0; j < mdata.ExpressionColumnCount; j++){
                    mdata[i, j] -= totalProfile[j];
                }
            }
        }