public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int minCount = param.GetParam <int>("Min. count").Value; int selCol = param.GetParam <int>("Selection").Value; string value = param.GetParam <string>("Value").Value; int[] catIndices = param.GetParam <int[]>("Categories").Value; bool[] selection = null; if (selCol < mdata.CategoryColumnCount) { selection = new bool[mdata.RowCount]; string[][] x = mdata.GetCategoryColumnAt(selCol); for (int i = 0; i < selection.Length; i++) { if (x[i] != null) { for (int j = 0; j < x[i].Length; j++) { if (x[i][j].Equals(value)) { selection[i] = true; break; } } } } } CountingResult result = CountCategories(mdata, selection, selCol, catIndices); CreateMatrixData(result, mdata, minCount, selection); }
public static List <string[][]> GetCategoryColumns(IMatrixData mdata, IList <int> inds) { List <string[][]> result = new List <string[][]>(); foreach (int ind in inds) { result.Add(mdata.GetCategoryColumnAt(ind)); } return(result); }
public static List <string[][]> GetCategoryColumns(IMatrixData mdata) { List <string[][]> result = new List <string[][]>(); for (int index = 0; index < mdata.CategoryColumnCount; index++) { result.Add(mdata.GetCategoryColumnAt(index)); } return(result); }
private static CountingResult CountCategories(IMatrixData data, bool[] selection, int selCol, IEnumerable <int> catIndices) { CountingResult result = new CountingResult(); foreach (int i in catIndices.Where(i => i != selCol)) { CountTerms(data.CategoryColumnNames[i], data.GetCategoryColumnAt(i), result, selection); } result.Sort(); return(result); }
private static int[] GetValidCatCols(IMatrixData data) { List <int> valids = new List <int>(); for (int i = 0; i < data.CategoryColumnCount; i++) { if (!IsInvalidCatColumn(data.GetCategoryColumnAt(i))) { valids.Add(i); } } return(valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { if (mdata.CategoryColumnCount < 2) { processInfo.ErrString = "There are less than two categorical columns available."; return; } int colInd1 = param.GetParam <int>("First column").Value; int colInd2 = param.GetParam <int>("Second column").Value; string[][] col1 = mdata.GetCategoryColumnAt(colInd1); string[][] col2 = mdata.GetCategoryColumnAt(colInd2); string[][] result = new string[col1.Length][]; for (int i = 0; i < result.Length; i++) { result[i] = CombineTerms(col1[i], col2[i]); } string colName = mdata.CategoryColumnNames[colInd1] + "_" + mdata.CategoryColumnNames[colInd2]; mdata.AddCategoryColumn(colName, "", result); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = param.GetParam <int[]>("Main columns").Value; int[] numColInds = param.GetParam <int[]>("Numerical columns").Value; int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value; int[] catColInds = param.GetParam <int[]>("Categorical columns").Value; int[] textColInds = param.GetParam <int[]>("Text columns").Value; if (exColInds.Length > 0) { int ncol = data.ColumnCount; data.ExtractColumns(ArrayUtils.Concat(ArrayUtils.ConsecutiveInts(data.ColumnCount), exColInds)); HashSet <string> taken = new HashSet <string>(data.ColumnNames); for (int i = 0; i < exColInds.Length; i++) { string s = StringUtils.GetNextAvailableName(data.ColumnNames[ncol + i], taken); data.ColumnNames[ncol + i] = s; taken.Add(s); } } foreach (int ind in numColInds) { HashSet <string> taken = new HashSet <string>(data.NumericColumnNames); string s = StringUtils.GetNextAvailableName(data.NumericColumnNames[ind], taken); data.AddNumericColumn(s, data.NumericColumnDescriptions[ind], (double[])data.NumericColumns[ind].Clone()); taken.Add(s); } foreach (int ind in multiNumColInds) { HashSet <string> taken = new HashSet <string>(data.MultiNumericColumnNames); string s = StringUtils.GetNextAvailableName(data.MultiNumericColumnNames[ind], taken); data.AddMultiNumericColumn(s, data.MultiNumericColumnDescriptions[ind], (double[][])data.MultiNumericColumns[ind].Clone()); taken.Add(s); } foreach (int ind in catColInds) { HashSet <string> taken = new HashSet <string>(data.CategoryColumnNames); string s = StringUtils.GetNextAvailableName(data.CategoryColumnNames[ind], taken); data.AddCategoryColumn(s, data.CategoryColumnDescriptions[ind], data.GetCategoryColumnAt(ind)); taken.Add(s); } foreach (int ind in textColInds) { HashSet <string> taken = new HashSet <string>(data.StringColumnNames); string s = StringUtils.GetNextAvailableName(data.StringColumnNames[ind], taken); data.AddStringColumn(s, data.ColumnDescriptions[ind], (string[])data.StringColumns[ind].Clone()); taken.Add(s); } }
public static bool[] GetIndicatorColumn(bool falseAreIndicated, int catColInd, string word, IMatrixData data) { string[][] catCol = data.GetCategoryColumnAt(catColInd); bool[] result = new bool[data.RowCount]; for (int i = 0; i < result.Length; i++){ string[] cats = catCol[i]; Array.Sort(cats); bool contains = Array.BinarySearch(cats, word) >= 0; if (falseAreIndicated){ result[i] = !contains; } else{ result[i] = contains; } } return result; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[][] col = mdata.GetCategoryColumnAt(param.GetParam <int>("Indicator column").Value); string term = param.GetParam <string>("Value").Value; List <int> inds = new List <int>(); for (int i = 0; i < col.Length; i++) { if (Contains(col[i], term)) { inds.Add(i); } } double[][] profiles = new double[inds.Count][]; for (int i = 0; i < profiles.Length; i++) { profiles[i] = ArrayUtils.ToDoubles(mdata.Values.GetRow(inds[i])); double mean = ArrayUtils.Mean(profiles[i]); for (int j = 0; j < profiles[i].Length; j++) { profiles[i][j] -= mean; } } double[] totalProfile = new double[mdata.ColumnCount]; for (int i = 0; i < totalProfile.Length; i++) { List <double> vals = new List <double>(); foreach (double[] t in profiles) { double val = t[i]; if (double.IsNaN(val) || double.IsInfinity(val)) { continue; } vals.Add(val); } totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : double.NaN; } for (int i = 0; i < mdata.RowCount; i++) { for (int j = 0; j < mdata.ColumnCount; j++) { mdata.Values.Set(i, j, mdata.Values.Get(i, j) - totalProfile[j]); } } }
public static void CombineRows(this IMatrixData mdata, List <int> rowIdxs, Func <double[], double> combineNumeric, Func <string[], string> combineString, Func <string[][], string[]> combineCategory, Func <double[][], double[]> combineMultiNumeric) { if (!rowIdxs.Any()) { return; } int resultRow = rowIdxs[0]; for (int i = 0; i < mdata.Values.ColumnCount; i++) { BaseVector column = mdata.Values.GetColumn(i); BaseVector values = column.SubArray(rowIdxs); mdata.Values[resultRow, i] = combineNumeric(ArrayUtils.ToDoubles(values)); } for (int i = 0; i < mdata.NumericColumnCount; i++) { double[] column = mdata.NumericColumns[i]; double[] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineNumeric(values); } for (int i = 0; i < mdata.StringColumnCount; i++) { string[] column = mdata.StringColumns[i]; string[] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineString(values); } for (int i = 0; i < mdata.CategoryColumnCount; i++) { string[][] column = mdata.GetCategoryColumnAt(i); string[][] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineCategory(values); mdata.SetCategoryColumnAt(column, i); } for (int i = 0; i < mdata.MultiNumericColumnCount; i++) { double[][] column = mdata.MultiNumericColumns[i]; double[][] values = ArrayUtils.SubArray(column, rowIdxs); column[resultRow] = combineMultiNumeric(values); } }
public static bool[] GetIndicatorColumn(bool falseAreIndicated, int catColInd, string word, IMatrixData data) { string[][] catCol = data.GetCategoryColumnAt(catColInd); bool[] result = new bool[data.RowCount]; for (int i = 0; i < result.Length; i++) { string[] cats = catCol[i]; Array.Sort(cats); bool contains = Array.BinarySearch(cats, word) >= 0; if (falseAreIndicated) { result[i] = !contains; } else { result[i] = contains; } } return(result); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[][] col = mdata.GetCategoryColumnAt(param.GetParam<int>("Indicator column").Value); string term = param.GetParam<string>("Value").Value; List<int> inds = new List<int>(); for (int i = 0; i < col.Length; i++){ if (Contains(col[i], term)){ inds.Add(i); } } double[][] profiles = new double[inds.Count][]; for (int i = 0; i < profiles.Length; i++){ profiles[i] = ArrayUtils.ToDoubles(mdata.Values.GetRow(inds[i])); float mean = (float) ArrayUtils.Mean(profiles[i]); for (int j = 0; j < profiles[i].Length; j++){ profiles[i][j] -= mean; } } double[] totalProfile = new double[mdata.ColumnCount]; for (int i = 0; i < totalProfile.Length; i++){ List<double> vals = new List<double>(); foreach (double[] t in profiles){ double val = t[i]; if (double.IsNaN(val) || double.IsInfinity(val)){ continue; } vals.Add(val); } totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : double.NaN; } for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ColumnCount; j++){ mdata.Values.Set(i, j, mdata.Values.Get(i, j)-(float) totalProfile[j]); } } }
public static int[] GetIndicesOf(IMatrixData data, string categoryName, HashSet<string> values) { int index = GetIndexOf(data, categoryName); List<int> result = new List<int>(); for (int i = 0; i < data.RowCount; i++){ string[] s = data.GetCategoryColumnAt(index)[i]; foreach (string s1 in s){ if (values.Contains(s1)){ result.Add(i); break; } } } return result.ToArray(); }
public void ProcessData(IMatrixData mdata, Parameters param1, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] multiNumCols = param1.GetMultiChoiceParam("Multi-numeric columns").Value; Array.Sort(multiNumCols); int[] stringCols = param1.GetMultiChoiceParam("String columns").Value; Array.Sort(stringCols); HashSet<int> multinumCols2 = new HashSet<int>(multiNumCols); HashSet<int> stringCols2 = new HashSet<int>(stringCols); if (multiNumCols.Length + stringCols.Length == 0){ processInfo.ErrString = "Please select some columns."; return; } int rowCount = GetNewRowCount(mdata, multiNumCols, stringCols); float[,] expVals = new float[rowCount,mdata.ExpressionColumnCount]; List<string[]> stringC = new List<string[]>(); for (int i = 0; i < mdata.StringColumnCount; i++){ stringC.Add(new string[rowCount]); } List<double[]> numC = new List<double[]>(); for (int i = 0; i < mdata.NumericColumnCount; i++){ numC.Add(new double[rowCount]); } List<string[][]> catC = new List<string[][]>(); for (int i = 0; i < mdata.CategoryColumnCount; i++){ catC.Add(new string[rowCount][]); } List<double[][]> multiNumC = new List<double[][]>(); for (int i = 0; i < mdata.MultiNumericColumnCount; i++){ multiNumC.Add(new double[rowCount][]); } int count = 0; for (int i = 0; i < mdata.RowCount; i++){ string err; int entryCount = GetEntryCount(i, mdata, multiNumCols, stringCols, out err); if (err != null){ processInfo.ErrString = err; return; } bool empty = entryCount == 0; entryCount = Math.Max(entryCount, 1); for (int j = 0; j < entryCount; j++){ for (int k = 0; k < mdata.ExpressionColumnCount; k++){ expVals[count + j, k] = mdata[i, k]; } for (int k = 0; k < mdata.NumericColumnCount; k++){ numC[k][count + j] = mdata.NumericColumns[k][i]; } for (int k = 0; k < mdata.CategoryColumnCount; k++){ catC[k][count + j] = mdata.GetCategoryColumnAt(k)[i]; } } for (int k = 0; k < mdata.MultiNumericColumnCount; k++){ if (multinumCols2.Contains(k)){ if (empty){ multiNumC[k][count] = new double[0]; } else{ double[] vals = mdata.MultiNumericColumns[k][i]; for (int j = 0; j < entryCount; j++){ multiNumC[k][count + j] = new[]{vals[j]}; } } } else{ for (int j = 0; j < entryCount; j++){ multiNumC[k][count + j] = mdata.MultiNumericColumns[k][i]; } } } for (int k = 0; k < mdata.StringColumnCount; k++){ if (stringCols2.Contains(k)){ if (empty){ stringC[k][count] = ""; } else{ string[] vals = mdata.StringColumns[k][i].Split(';'); for (int j = 0; j < entryCount; j++){ stringC[k][count + j] = vals[j]; } } } else{ for (int j = 0; j < entryCount; j++){ stringC[k][count + j] = mdata.StringColumns[k][i]; } } } count += entryCount; } int[] multiNumComplement = ArrayUtils.Complement(multiNumCols, mdata.MultiNumericColumnCount); List<double[][]> toBeTransformed = ArrayUtils.SubList(multiNumC, multiNumCols); multiNumC = ArrayUtils.SubList(multiNumC, multiNumComplement); foreach (double[][] d in toBeTransformed){ numC.Add(Transform(d)); } mdata.SetData(mdata.Name, mdata.ExpressionColumnNames, expVals, mdata.StringColumnNames, stringC, mdata.CategoryColumnNames, catC, new List<string>(ArrayUtils.Concat(mdata.NumericColumnNames, ArrayUtils.SubList(mdata.MultiNumericColumnNames, multiNumCols))), numC, new List<string>(ArrayUtils.SubArray(mdata.MultiNumericColumnNames, multiNumComplement)), multiNumC); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string colName = param.GetStringParam("Name of new column").Value; int[] columns = param.GetMultiChoiceParam("Categories").Value; bool inverse = param.GetBoolParam("Inverse").Value; int[] catCols; int[] stringCols; Split(columns, out catCols, out stringCols, mdata.CategoryColumnCount); string[] word1 = param.GetMultiStringParam("Search terms").Value; if (word1.Length == 0){ processInfo.ErrString = "Please specify one or more search terms."; return; } if (string.IsNullOrEmpty(colName)){ colName = word1[0]; } string[] word = new string[word1.Length]; for (int i = 0; i < word.Length; i++){ word[i] = word1[i].ToLower().Trim(); } bool[] indicator = new bool[mdata.RowCount]; foreach (int col in catCols){ string[][] cat = mdata.GetCategoryColumnAt(col); for (int i = 0; i < cat.Length; i++){ foreach (string s in cat[i]){ foreach (string s1 in word){ if (s.ToLower().Contains(s1)){ indicator[i] = true; break; } } } } } foreach (string[] txt in stringCols.Select(col => mdata.StringColumns[col])){ for (int i = 0; i < txt.Length; i++){ string s = txt[i]; foreach (string s1 in word){ if (s.ToLower().Contains(s1)){ indicator[i] = true; break; } } } } string[][] newCol = new string[indicator.Length][]; for (int i = 0; i < newCol.Length; i++){ bool yes = inverse ? !indicator[i] : indicator[i]; newCol[i] = yes ? new[]{"+"} : new string[0]; } mdata.AddCategoryColumn(colName, "", newCol); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool keepEmpty = param.GetBoolParam("Keep rows without ID").Value; AverageType atype = GetAverageType(param.GetSingleChoiceParam("Average type for expression columns").Value); string[] ids2 = mdata.StringColumns[param.GetSingleChoiceParam("ID column").Value]; string[][] ids = SplitIds(ids2); int[] present; int[] absent; GetPresentAbsentIndices(ids, out present, out absent); ids = ArrayUtils.SubArray(ids, present); int[][] rowInds = new int[present.Length][]; for (int i = 0; i < rowInds.Length; i++){ rowInds[i] = new[]{present[i]}; } ClusterRows(ref rowInds, ref ids); if (keepEmpty){ rowInds = ProlongRowInds(rowInds, absent); } int nrows = rowInds.Length; int ncols = mdata.ExpressionColumnCount; float[,] expVals = new float[nrows,ncols]; for (int j = 0; j < ncols; j++){ float[] c = mdata.GetExpressionColumn(j); for (int i = 0; i < nrows; i++){ float[] d = ArrayUtils.SubArray(c, rowInds[i]); expVals[i, j] = Average(d, atype); } } mdata.ExpressionValues = expVals; for (int i = 0; i < mdata.NumericColumnCount; i++){ string name = mdata.NumericColumnNames[i]; AverageType atype1 = GetAverageType(param.GetSingleChoiceParam("Average type for " + name).Value); double[] c = mdata.NumericColumns[i]; double[] newCol = new double[nrows]; for (int k = 0; k < nrows; k++){ double[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d, atype1); } mdata.NumericColumns[i] = newCol; } for (int i = 0; i < mdata.CategoryColumnCount; i++){ string[][] c = mdata.GetCategoryColumnAt(i); string[][] newCol = new string[nrows][]; for (int k = 0; k < nrows; k++){ string[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.SetCategoryColumnAt(newCol,i); } for (int i = 0; i < mdata.StringColumnCount; i++){ string[] c = mdata.StringColumns[i]; string[] newCol = new string[nrows]; for (int k = 0; k < nrows; k++){ string[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.StringColumns[i] = newCol; } for (int i = 0; i < mdata.MultiNumericColumnCount; i++){ double[][] c = mdata.MultiNumericColumns[i]; double[][] newCol = new double[nrows][]; for (int k = 0; k < nrows; k++){ double[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.MultiNumericColumns[i] = newCol; } }
public static List<string[][]> GetCategoryColumns(IMatrixData mdata, IList<int> inds) { List<string[][]> result = new List<string[][]>(); foreach (int ind in inds){ result.Add(mdata.GetCategoryColumnAt(ind)); } return result; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int minCount = param.GetIntParam("Min. count").Value; int selCol = param.GetSingleChoiceParam("Selection").Value; string value = param.GetStringParam("Value").Value; int[] catIndices = param.GetMultiChoiceParam("Categories").Value; bool[] selection = null; if (selCol < mdata.CategoryColumnCount){ selection = new bool[mdata.RowCount]; string[][] x = mdata.GetCategoryColumnAt(selCol); for (int i = 0; i < selection.Length; i++){ if (x[i] != null){ for (int j = 0; j < x[i].Length; j++){ if (x[i][j].Equals(value)){ selection[i] = true; break; } } } } } CountingResult result = CountCategories(mdata, selection, selCol, catIndices); CreateMatrixData(result, mdata, minCount, selection); }
protected void GetExperminetValues(IMatrixData summary, IMatrixData experimentalDesignTemplate, IMatrixData experiment, IMatrixData spectraRef, ref List<MsRunImpl> msruns, ref List<StudyVariable> studyvariables, ref List<Assay> assays, ref List<Sample> samples, ref List<Instrument> instruments) { if (msruns == null) { msruns = new List<MsRunImpl>(); } if (studyvariables == null) { studyvariables = new List<StudyVariable>(); } if (assays == null) { assays = new List<Assay>(); } if (samples == null) { samples = new List<Sample>(); } if (instruments == null) { instruments = new List<Instrument>(); } #region parse experiment if (experiment != null) { int studyvarIndex = experiment.StringColumnNames.IndexOf(MetadataElement.STUDY_VARIABLE.Name); int assayIndex = experiment.StringColumnNames.IndexOf(MetadataElement.ASSAY.Name); int msrunIndex = experiment.StringColumnNames.IndexOf(MetadataElement.MS_RUN.Name); int sampleIndex = experiment.StringColumnNames.IndexOf(MetadataElement.SAMPLE.Name); Regex sampleRegex = new Regex(@"^([^\[]+) <([^;]*);([^;]*);([^;]*);([^;]*)>"); Regex runRegex = new Regex(@"^([^\[]+) <([^;]*);([^;]*);([^;]*);([^;]*)>"); Regex assayRegex = new Regex(@"^([^\[]+) <([^>]*)>"); for (int row = 0; row < experiment.RowCount; row++) { string studyvariableDescription = experiment.StringColumns[studyvarIndex][row]; string assayReagent = experiment.StringColumns[assayIndex][row]; string msrunText = experiment.StringColumns[msrunIndex][row]; string sampleDescription = experiment.StringColumns[sampleIndex][row]; Lib.Model.Param specie = null; Lib.Model.Param tissue = null; Lib.Model.Param cellType = null; Lib.Model.Param disease = null; IList<Lib.Model.Param> mod = new List<Lib.Model.Param>(); if (sampleDescription != null && sampleRegex.IsMatch(sampleDescription)) { var match = sampleRegex.Match(sampleDescription); sampleDescription = match.Groups[1].Value; string temp = match.Groups[2].Value; if (!String.IsNullOrEmpty(temp)) { specie = cv.GetParam(temp, "NEWT"); } temp = match.Groups[3].Value; if (!String.IsNullOrEmpty(temp)) { tissue = cv.GetParam(temp, "BTO"); } temp = match.Groups[4].Value; if (!String.IsNullOrEmpty(temp)) { cellType = cv.GetParam(temp, "CL"); } temp = match.Groups[5].Value; if (!String.IsNullOrEmpty(temp)) { disease = cv.GetParam(temp, "DOID"); } } if (assayRegex != null && assayRegex.IsMatch(assayReagent)) { var match = assayRegex.Match(assayReagent); string temp = match.Groups[2].Value; if (!String.IsNullOrEmpty(temp)) { foreach (var t in temp.Split(';')) { mod.Add(cv.GetParam(t, "PRIDE")); } } assayReagent = match.Groups[1].Value; } string filename = null; string path = null; Lib.Model.Param format = null; Lib.Model.Param idformat = null; Lib.Model.Param fragementaion = null; if (runRegex != null && runRegex.IsMatch(msrunText)) { var match = runRegex.Match(msrunText); filename = match.Groups[1].Value; string temp = match.Groups[2].Value; if (!String.IsNullOrEmpty(temp)) { path = temp; } temp = match.Groups[3].Value; if (!String.IsNullOrEmpty(temp)) { format = cv.GetParam(temp, "MS"); } temp = match.Groups[4].Value; if (!String.IsNullOrEmpty(temp)) { idformat = cv.GetParam(temp, "MS"); } temp = match.Groups[5].Value; if (!String.IsNullOrEmpty(temp)) { fragementaion = cv.GetParam(temp, "MS"); } } StudyVariable studyvariable; if (!studyvariables.Any(x => x.Description.Equals(studyvariableDescription))) { studyvariable = new StudyVariable(studyvariables.Count + 1) { Description = studyvariableDescription }; studyvariables.Add(studyvariable); } else { studyvariable = studyvariables.First(x => x.Description.Equals(studyvariableDescription)); } Assay assay = new Assay(assays.Count + 1) { QuantificationReagent = cv.GetParam(assayReagent, "PRIDE") }; foreach (var m in mod) { if (m == null) { continue; } assay.addQuantificationMod(new AssayQuantificationMod(assay, assay.QuantificationModMap.Count + 1) { Param = m }); } assays.Add(assay); MsRunImpl msrun; if (!String.IsNullOrEmpty(filename) && !msruns.Any(x => x.Description != null && x.Description.Equals(filename))) { msrun = new MsRunImpl(msruns.Count + 1) { Format = format, IdFormat = idformat, FragmentationMethod = fragementaion }; msruns.Add(msrun); msrun.Location = new Url(String.IsNullOrEmpty(path) ? filename : Path.Combine(path, filename)); } else { msrun = msruns.First(x => x.Description != null && x.Description.Equals(filename)); } Sample sample; if (!samples.Any(x => x.Description.Equals(sampleDescription))) { sample = new Sample(samples.Count + 1) { Description = sampleDescription }; if (specie != null) { sample.AddSpecies(specie); } if (tissue != null) { sample.AddTissue(tissue); } if (cellType != null) { sample.AddCellType(cellType); } if (disease != null) { sample.AddDisease(disease); } samples.Add(sample); } else { sample = samples.First(x => x.Description.Equals(sampleDescription)); } if (!studyvariable.AssayMap.ContainsKey(assay.Id)) { studyvariable.AddAssay(assay); } if (!studyvariable.SampleMap.ContainsKey(sample.Id)) { studyvariable.AddSample(sample); } assay.MsRun = msrun; assay.Sample = sample; } } #endregion Dictionary<int, IList<string>> dictionary = new Dictionary<int, IList<string>>(); #region parse experimentalDesign if (experimentalDesignTemplate != null) { string[] rawfiles = null; int index = Constants.GetKeywordIndex(experimentalDesign.rawfile, experimentalDesignTemplate.StringColumnNames); if (index != -1) { rawfiles = experimentalDesignTemplate.StringColumns[index]; } string[] experimentNames = null; if ( (index = Constants.GetKeywordIndex(experimentalDesign.variable, experimentalDesignTemplate.StringColumnNames)) != -1) { experimentNames = experimentalDesignTemplate.StringColumns[index]; } else if ( (index = Constants.GetKeywordIndex(experimentalDesign.variable, experimentalDesignTemplate.CategoryColumnNames)) != -1) { experimentNames = MzTabMatrixUtils.ConvertToStringArray(experimentalDesignTemplate.GetCategoryColumnAt(index)); } if (rawfiles != null && experimentNames != null) { for (int i = 0; i < rawfiles.Length && i < experimentNames.Length; i++) { string name = experimentNames[i]; StudyVariable variable = studyvariables.FirstOrDefault(x => x.Description.Equals(name)); if (variable == null) { variable = new StudyVariable(studyvariables.Count + 1) { Description = name }; studyvariables.Add(variable); } string rawfile = rawfiles[i]; MsRunImpl runImpl = msruns.FirstOrDefault(x => x.Description.Equals(rawfile)); if (runImpl == null) { runImpl = new MsRunImpl(msruns.Count + 1) { Location = new Url(rawfile), Format = cv.GetParam("MS:1000563", "MS"), IdFormat = cv.GetParam("MS:1000768", "MS") }; msruns.Add(runImpl); } if (rawfile != null) { if (!dictionary.ContainsKey(variable.Id)) { dictionary.Add(variable.Id, new List<string>()); } dictionary[variable.Id].Add(rawfile); } } } else { Console.Out.WriteLine("Rawfiles " + rawfiles); Console.Out.WriteLine("experimentNames " + experimentNames); throw new Exception("Could not parse " + Matrix.ExperimentalDesign); } } #endregion #region add default samples from studyvariables if (studyvariables != null && studyvariables.Count > 0) { foreach (StudyVariable variable in studyvariables) { string text = variable.Description; Sample sample = samples.FirstOrDefault(x => text.Contains(x.Description)); if (sample == null) { sample = new Sample(samples.Count + 1) { Description = text }; samples.Add(sample); } variable.AddSample(sample); } } #endregion #region parse summary if (summary != null) { int maxRow = msruns.Count; string multi = "1"; string[] labels0 = null; int index; if ((index = Constants.GetKeywordIndex(Utils.summary.labels0, summary.StringColumnNames)) != -1) { labels0 = summary.StringColumns[index]; multi = "1"; } else if ((index = Constants.GetKeywordIndex(Utils.summary.labels0, summary.CategoryColumnNames)) != -1) { labels0 = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index)); multi = "1"; } string[] labels1 = null; if ((index = Constants.GetKeywordIndex(Utils.summary.labels1, summary.StringColumnNames)) != -1) { labels1 = summary.StringColumns[index]; multi = "2"; } else if ((index = Constants.GetKeywordIndex(Utils.summary.labels1, summary.CategoryColumnNames)) != -1) { labels1 = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index)); multi = "2"; } string[] labels2 = null; if ((index = Constants.GetKeywordIndex(Utils.summary.labels2, summary.StringColumnNames)) != -1) { labels2 = summary.StringColumns[index]; multi = "3"; } else if ((index = Constants.GetKeywordIndex(Utils.summary.labels2, summary.CategoryColumnNames)) != -1) { labels2 = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index)); multi = "3"; } string[] multiplicity; if ((index = Constants.GetKeywordIndex(Utils.summary.multiplicity, summary.StringColumnNames)) != -1) { multiplicity = summary.StringColumns[index]; multiplicity = multiplicity.Where(x => !String.IsNullOrEmpty(x)).ToArray(); } else if ( (index = Constants.GetKeywordIndex(Utils.summary.multiplicity, summary.CategoryColumnNames)) != -1) { multiplicity = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index)); multiplicity = multiplicity.Where(x => !String.IsNullOrEmpty(x)).ToArray(); } else { multiplicity = new string[maxRow]; for (int i = 0; i < multiplicity.Length; i++) { multiplicity[i] = multi; } } string[] labels; switch (multi) { case "1": labels = null; break; case "2": labels = new[] { "L", "H" }; break; case "3": labels = new[] { "L", "H", "M" }; break; default: labels = null; break; } if (labels != null) { List<StudyVariable> list = new List<StudyVariable>(); Dictionary<int, IList<string>> dict = new Dictionary<int, IList<string>>(); foreach (StudyVariable studyVariable in studyvariables){ foreach (var variable in SILAC(studyVariable, labels)) { IList<string> rawfile = null; if (dictionary.ContainsKey(variable.Id)) { rawfile = dictionary[variable.Id]; } StudyVariable tmp = new StudyVariable(list.Count + 1){Description = variable.Description}; tmp.AddAllAssays(variable.AssayMap.Values.ToList()); tmp.AddAllSamples(variable.SampleMap.Values.ToList()); list.Add(tmp); if (rawfile != null) { if (!dict.ContainsKey(tmp.Id)) { dict.Add(tmp.Id, rawfile); } } } } studyvariables = list; dictionary = dict; } string[] rawfiles = null; if ((index = Constants.GetKeywordIndex(Utils.summary.rawfile, summary.StringColumnNames)) != -1) { rawfiles = summary.StringColumns[index]; rawfiles = rawfiles.Where(x => !String.IsNullOrEmpty(x)).ToArray(); } else if ((index = Constants.GetKeywordIndex(Utils.summary.rawfile, summary.CategoryColumnNames)) != -1) { rawfiles = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index)); rawfiles = rawfiles.Where(x => !String.IsNullOrEmpty(x)).ToArray(); } string[] orbitrapInstruments = new[] { "LTQ Orbitrap", "LTQ Orbitrap XL", "LTQ Orbitrap Velos", "LTQ Orbitrap Elite", "Q Exactive" }; string[] instrument = null; if ((index = Constants.GetKeywordIndex(Utils.summary.instrument, summary.StringColumnNames)) != -1) { instrument = summary.StringColumns[index]; instrument = instrument.Where(x => !String.IsNullOrEmpty(x)).ToArray(); } else if ((index = Constants.GetKeywordIndex(Utils.summary.instrument, summary.CategoryColumnNames)) != -1) { instrument = MzTabMatrixUtils.ConvertToStringArray(summary.GetCategoryColumnAt(index)); instrument = instrument.Where(x => !String.IsNullOrEmpty(x)).ToArray(); } if (rawfiles != null) { for (int i = 0; i < rawfiles.Length; i++) { int id = assays.Count + 1; string rawfile = rawfiles[i]; if (!dictionary.Values.Any(x => x.Contains(rawfile))) { continue; } IList<StudyVariable> temp = new List<StudyVariable>(); foreach (var v in dictionary.Where(x => x.Value.Contains(rawfile))) { temp.Add(studyvariables.FirstOrDefault(x => x.Id == v.Key)); } StudyVariable variable1 = null; StudyVariable variable2 = null; StudyVariable variable3 = null; if (temp != null) { if (temp.Any()) { variable1 = temp[0]; } if (temp.Count() > 1) { variable2 = temp[1]; } if (temp.Count() > 2) { variable3 = temp[2]; } } if (multiplicity[i].Equals("1")) { #region Add assay for label free Assay assay = new Assay(id) { QuantificationReagent = cv.GetParam("Unlabeled sample", "PRIDE"), MsRun = msruns[i] }; if (variable1 != null) { assay.Sample = variable1.SampleMap.Values.FirstOrDefault(); variable1.AddAssay(assay); } assays.Add(assay); #endregion } else if (multiplicity[i].Equals("2")) { #region Add assays for Double SILAC labeling Assay assay = new Assay(id) { QuantificationReagent = cv.GetParam("SILAC light", "PRIDE"), MsRun = msruns[i] }; IList<AssayQuantificationMod> mods = MzTabMatrixUtils.GetQuantificationMod(labels0, i, assay); if (mods != null) { foreach (var m in mods) { assay.addQuantificationMod(m); } } if (variable1 != null) { assay.Sample = variable1.SampleMap.Values.FirstOrDefault(); variable1.AddAssay(assay); } assays.Add(assay); assay = new Assay(id + 1) { QuantificationReagent = cv.GetParam("SILAC heavy", "PRIDE"), MsRun = msruns[i] }; mods = MzTabMatrixUtils.GetQuantificationMod(labels1, i, assay); if (mods != null) { foreach (var m in mods) { assay.addQuantificationMod(m); } } if (variable2 != null) { assay.Sample = variable2.SampleMap.Values.FirstOrDefault(); variable2.AddAssay(assay); } assays.Add(assay); #endregion } else if (multiplicity[i].Equals("3")) { #region Add assays for Triple SILAC labeling Assay assay = new Assay(id) { QuantificationReagent = cv.GetParam("SILAC light", "PRIDE"), MsRun = msruns[i] }; IList<AssayQuantificationMod> mods = MzTabMatrixUtils.GetQuantificationMod(labels0, i, assay); if (mods != null) { foreach (var m in mods) { assay.addQuantificationMod(m); } } if (variable1 != null) { assay.Sample = variable1.SampleMap.Values.FirstOrDefault(); variable1.AddAssay(assay); } assays.Add(assay); assay = new Assay(id + 1) { QuantificationReagent = cv.GetParam("SILAC medium", "PRIDE"), MsRun = msruns[i] }; mods = MzTabMatrixUtils.GetQuantificationMod(labels1, i, assay); if (mods != null) { foreach (var m in mods) { assay.addQuantificationMod(m); } } if (variable2 != null) { assay.Sample = variable2.SampleMap.Values.FirstOrDefault(); variable2.AddAssay(assay); } assays.Add(assay); assay = new Assay(id + 2) { QuantificationReagent = cv.GetParam("SILAC heavy", "PRIDE"), MsRun = msruns[i] }; mods = MzTabMatrixUtils.GetQuantificationMod(labels2, i, assay); if (mods != null) { foreach (var m in mods) { assay.addQuantificationMod(m); } } if (variable3 != null) { assay.Sample = variable3.SampleMap.Values.FirstOrDefault(); variable3.AddAssay(assay); } assays.Add(assay); #endregion } if (instrument != null && !String.IsNullOrEmpty(instrument[i])) { var tmp = new Instrument(instruments.Count + 1) { Name = cv.GetParam(instrument[i], "MS") }; if (orbitrapInstruments.Contains(instrument[i])) { tmp.Source = cv.GetParam("electrospray ionization", "MS"); tmp.Analyzer = cv.GetParam("orbitrap", "MS"); } instruments.Add(tmp); } } } } #endregion #region parse search Lib.Model.Param run_idFormat = cv.GetParam("MS:1000774", "MS"); Lib.Model.Param run_format = cv.GetParam("Andromeda Peak list file", "MS"); if (spectraRef != null && Constants.GetKeywordName(Utils.spectraRef.location, spectraRef.StringColumnNames) != null) { int colindex = Constants.GetKeywordIndex(Utils.spectraRef.location, spectraRef.StringColumnNames); string[] values = ArrayUtils.UniqueValues(spectraRef.StringColumns[colindex]); for (int i = 0; i < values.Length; i++) { Lib.Model.Param frag = null; if (values[i].Contains("CID")) { frag = cv.GetParam("MS:1000133", "MS"); } else if (values[i].Contains("HCD")) { frag = cv.GetParam("MS:1000422", "MS"); } msruns.Add(new MsRunImpl(msruns.Count + 1) { IdFormat = run_idFormat, Format = run_format, FragmentationMethod = frag, Location = new Url(values[i]) }); } } else { msruns.Add(new MsRunImpl(msruns.Count + 1) { IdFormat = run_idFormat, Format = run_format }); } #endregion }
public void Export(Parameters parameters, IMatrixData data, ProcessInfo processInfo) { string filename = parameters.GetFileParam("File name").Value; StreamWriter writer; try{ writer = new StreamWriter(filename); } catch (Exception e){ processInfo.ErrString = e.Message; return; } List<string> words = new List<string>(); for (int i = 0; i < data.ExpressionColumnCount; i++){ words.Add(Trunc(data.ExpressionColumnNames[i])); } for (int i = 0; i < data.CategoryColumnCount; i++){ words.Add(Trunc(data.CategoryColumnNames[i])); } for (int i = 0; i < data.NumericColumnCount; i++){ words.Add(Trunc(data.NumericColumnNames[i])); } for (int i = 0; i < data.StringColumnCount; i++){ words.Add(Trunc(data.StringColumnNames[i])); } for (int i = 0; i < data.MultiNumericColumnCount; i++){ words.Add(Trunc(data.MultiNumericColumnNames[i])); } writer.WriteLine(StringUtils.Concat("\t", words)); if (HasAnyDescription(data)){ words = new List<string>(); for (int i = 0; i < data.ExpressionColumnCount; i++){ words.Add(Trunc(data.ExpressionColumnDescriptions[i] ?? "")); } for (int i = 0; i < data.CategoryColumnCount; i++){ words.Add(Trunc(data.CategoryColumnDescriptions[i] ?? "")); } for (int i = 0; i < data.NumericColumnCount; i++){ words.Add(Trunc(data.NumericColumnDescriptions[i] ?? "")); } for (int i = 0; i < data.StringColumnCount; i++){ words.Add(Trunc(data.StringColumnDescriptions[i] ?? "")); } for (int i = 0; i < data.MultiNumericColumnCount; i++){ words.Add(Trunc(data.MultiNumericColumnDescriptions[i] ?? "")); } writer.WriteLine("#!{Description}" + StringUtils.Concat("\t", words)); } words = new List<string>(); for (int i = 0; i < data.ExpressionColumnCount; i++){ words.Add("E"); } for (int i = 0; i < data.CategoryColumnCount; i++){ words.Add("C"); } for (int i = 0; i < data.NumericColumnCount; i++){ words.Add("N"); } for (int i = 0; i < data.StringColumnCount; i++){ words.Add("T"); } for (int i = 0; i < data.MultiNumericColumnCount; i++){ words.Add("M"); } writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", words)); for (int i = 0; i < data.NumericRowCount; i++){ words = new List<string>(); for (int j = 0; j < data.ExpressionColumnCount; j++){ words.Add("" + data.NumericRows[i][j]); } for (int j = 0; j < data.CategoryColumnCount; j++){ words.Add(""); } for (int j = 0; j < data.NumericColumnCount; j++){ words.Add(""); } for (int j = 0; j < data.StringColumnCount; j++){ words.Add(""); } for (int j = 0; j < data.MultiNumericColumnCount; j++){ words.Add(""); } writer.WriteLine("#!{N:" + data.NumericRowNames[i] + "}" + StringUtils.Concat("\t", words)); } for (int i = 0; i < data.CategoryRowCount; i++){ words = new List<string>(); for (int j = 0; j < data.ExpressionColumnCount; j++){ string[] s = data.GetCategoryRowAt(i)[j]; words.Add(s.Length == 0 ? "" : StringUtils.Concat(";", s)); } for (int j = 0; j < data.CategoryColumnCount; j++){ words.Add(""); } for (int j = 0; j < data.NumericColumnCount; j++){ words.Add(""); } for (int j = 0; j < data.StringColumnCount; j++){ words.Add(""); } for (int j = 0; j < data.MultiNumericColumnCount; j++){ words.Add(""); } writer.WriteLine("#!{C:" + data.CategoryRowNames[i] + "}" + StringUtils.Concat("\t", words)); } for (int j = 0; j < data.RowCount; j++){ words = new List<string>(); for (int i = 0; i < data.ExpressionColumnCount; i++){ words.Add(Trunc("" + data[j, i])); } for (int i = 0; i < data.CategoryColumnCount; i++){ string[] q = data.GetCategoryColumnAt(i)[j] ?? new string[0]; words.Add(Trunc((q.Length > 0 ? StringUtils.Concat(";", q) : ""))); } for (int i = 0; i < data.NumericColumnCount; i++){ words.Add(Trunc("" + data.NumericColumns[i][j])); } for (int i = 0; i < data.StringColumnCount; i++){ words.Add(Trunc(data.StringColumns[i][j])); } for (int i = 0; i < data.MultiNumericColumnCount; i++){ double[] q = data.MultiNumericColumns[i][j]; words.Add(Trunc((q.Length > 0 ? StringUtils.Concat(";", q) : ""))); } string s = StringUtils.Concat("\t", words); s = s.Replace("\"", ""); writer.WriteLine(s); } writer.Close(); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { bool keepEmpty = param.GetParam <bool>("Keep rows without ID").Value; AverageType atype = GetAverageType(param.GetParam <int>("Average type for expression columns").Value); string[] ids2 = mdata.StringColumns[param.GetParam <int>("ID column").Value]; string[][] ids = SplitIds(ids2); int[] present; int[] absent; GetPresentAbsentIndices(ids, out present, out absent); ids = ArrayUtils.SubArray(ids, present); int[][] rowInds = new int[present.Length][]; for (int i = 0; i < rowInds.Length; i++) { rowInds[i] = new[] { present[i] }; } ClusterRows(ref rowInds, ref ids); if (keepEmpty) { rowInds = ProlongRowInds(rowInds, absent); } int nrows = rowInds.Length; int ncols = mdata.ColumnCount; float[,] expVals = new float[nrows, ncols]; for (int j = 0; j < ncols; j++) { double[] c = ArrayUtils.ToDoubles(mdata.Values.GetColumn(j)); for (int i = 0; i < nrows; i++) { double[] d = ArrayUtils.SubArray(c, rowInds[i]); expVals[i, j] = (float)Average(d, atype); } } mdata.Values.Set(expVals); for (int i = 0; i < mdata.NumericColumnCount; i++) { string name = mdata.NumericColumnNames[i]; AverageType atype1 = GetAverageType(param.GetParam <int>("Average type for " + name).Value); double[] c = mdata.NumericColumns[i]; double[] newCol = new double[nrows]; for (int k = 0; k < nrows; k++) { double[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d, atype1); } mdata.NumericColumns[i] = newCol; } for (int i = 0; i < mdata.CategoryColumnCount; i++) { string[][] c = mdata.GetCategoryColumnAt(i); string[][] newCol = new string[nrows][]; for (int k = 0; k < nrows; k++) { string[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.SetCategoryColumnAt(newCol, i); } for (int i = 0; i < mdata.StringColumnCount; i++) { string[] c = mdata.StringColumns[i]; string[] newCol = new string[nrows]; for (int k = 0; k < nrows; k++) { string[] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.StringColumns[i] = newCol; } for (int i = 0; i < mdata.MultiNumericColumnCount; i++) { double[][] c = mdata.MultiNumericColumns[i]; double[][] newCol = new double[nrows][]; for (int k = 0; k < nrows; k++) { double[][] d = ArrayUtils.SubArray(c, rowInds[k]); newCol[k] = Average(d); } mdata.MultiNumericColumns[i] = newCol; } }
public static List<string[][]> GetCategoryColumns(IMatrixData mdata) { List<string[][]> result = new List<string[][]>(); for (int index = 0; index < mdata.CategoryColumnCount; index++){ result.Add(mdata.GetCategoryColumnAt(index)); } return result; }
public void SmallTest() { IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] { { 0, 4 }, { 1, 5 }, { 2, 6 }, { 3, 7 } }); mdata.AddStringColumn("id", "", new [] { "a", "b", "b", "b" }); mdata.AddStringColumn("str", "", new [] { "a;b", "b;c", "c;d", "d;e" }); mdata.AddCategoryColumn("cat", "", new[] { new[] { "a", "b" }, new[] { "b", "c" }, new[] { "c", "d" }, new[] { "d", "e" } }); mdata.AddNumericColumn("num", "", new [] { 0, 1, 2, 3, 4.0 }); mdata.AddMultiNumericColumn("mnum", "", new [] { new [] { 0, 4d }, new [] { 1, 5d }, new [] { 2, 6d }, new [] { 3, 7d } }); mdata.UniqueRows(mdata.StringColumns[0], ArrayUtils.Median, UniqueRows.Union, UniqueRows.CatUnion, UniqueRows.MultiNumUnion); Assert.AreEqual(2, mdata.RowCount); CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.Values.GetColumn(0)); CollectionAssert.AreEqual(new [] { 4, 6 }, mdata.Values.GetColumn(1)); CollectionAssert.AreEqual(new [] { "a;b", "b;c;d;e" }, mdata.GetStringColumn("str")); CollectionAssert.AreEqual(new [] { new [] { "a", "b" }, new [] { "b", "c", "d", "e" } }, mdata.GetCategoryColumnAt(0)); CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.NumericColumns[0]); CollectionAssert.AreEqual(new [] { new [] { 0d, 4 }, new [] { 1d, 5, 2, 6, 3, 7 } }, mdata.MultiNumericColumns[0]); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int colIndex = param.GetSingleChoiceParam("Column with second last AA").Value; string aas = param.GetStringParam("Amino acids").Value; string[][] col = mdata.GetCategoryColumnAt(colIndex); List<int> validRows = new List<int>(); for (int i = 0; i < mdata.RowCount; i++){ string[] x = col[i]; for (int j = 0; j < x.Length; j++){ if (x[j].Length != 1){ processInfo.ErrString = "Some of the entries in column " + mdata.CategoryColumnNames[colIndex] + " do not contain amino acids"; return; } } bool keep = JudgeIfKept(aas, x); if (keep){ validRows.Add(i); } } mdata.ExtractExpressionRows(validRows.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { SingleChoiceWithSubParams p = param.GetSingleChoiceWithSubParams("Column"); int colInd = p.Value; if (colInd < 0){ processInfo.ErrString = "No categorical columns available."; return; } MultiChoiceParam mcp = p.GetSubParameters().GetMultiChoiceParam("Values"); int[] inds = mcp.Value; if (inds.Length == 0){ processInfo.ErrString = "Please select at least one term for filtering."; return; } string[] values = new string[inds.Length]; for (int i = 0; i < values.Length; i++){ values[i] = mdata.GetCategoryColumnValuesAt(colInd)[inds[i]]; } HashSet<string> value = new HashSet<string>(values); bool remove = param.GetSingleChoiceParam("Mode").Value == 0; string[][] cats = mdata.GetCategoryColumnAt(colInd); List<int> valids = new List<int>(); for (int i = 0; i < cats.Length; i++){ bool valid = true; foreach (string w in cats[i]){ if (value.Contains(w)){ valid = false; break; } } if ((valid && remove) || (!valid && !remove)){ valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { if (mdata.CategoryColumnCount < 2){ processInfo.ErrString = "There are less than two categorical columns available."; return; } int colInd1 = param.GetSingleChoiceParam("First column").Value; int colInd2 = param.GetSingleChoiceParam("Second column").Value; string[][] col1 = mdata.GetCategoryColumnAt(colInd1); string[][] col2 = mdata.GetCategoryColumnAt(colInd2); string[][] result = new string[col1.Length][]; for (int i = 0; i < result.Length; i++){ result[i] = CombineTerms(col1[i], col2[i]); } string colName = mdata.CategoryColumnNames[colInd1] + "_" + mdata.CategoryColumnNames[colInd2]; mdata.AddCategoryColumn(colName, "", result); }
private static List<string[][]> GetCategoryColumns(IMatrixData mdata) { List<string[][]> result = new List<string[][]>(); for (int i = 0; i < mdata.CategoryColumnCount; i++) { result.Add(mdata.GetCategoryColumnAt(i)); } return result; }
public static CountingResult CountCategories(IMatrixData data, bool[] selection, int selCol, int[] catIndices) { CountingResult result = new CountingResult(); foreach (int i in catIndices.Where(i => i != selCol)){ CountTerms(data.CategoryColumnNames[i], data.GetCategoryColumnAt(i), result, selection); } result.Sort(); return result; }
private static int[] GetValidCatCols(IMatrixData data) { List<int> valids = new List<int>(); for (int i = 0; i < data.CategoryColumnCount; i++){ if (!IsInvalidCatColumn(data.GetCategoryColumnAt(i))){ valids.Add(i); } } return valids.ToArray(); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { string[][] col = mdata.GetCategoryColumnAt(param.GetSingleChoiceParam("Indicator column").Value); string term = param.GetStringParam("Value").Value; List<int> inds = new List<int>(); for (int i = 0; i < col.Length; i++){ if (Contains(col[i], term)){ inds.Add(i); } } float[][] profiles = new float[inds.Count][]; for (int i = 0; i < profiles.Length; i++){ profiles[i] = mdata.GetExpressionRow(inds[i]); float mean = (float) ArrayUtils.Mean(profiles[i]); for (int j = 0; j < profiles[i].Length; j++){ profiles[i][j] -= mean; } } float[] totalProfile = new float[mdata.ExpressionColumnCount]; for (int i = 0; i < totalProfile.Length; i++){ List<float> vals = new List<float>(); foreach (float[] t in profiles){ float val = t[i]; if (float.IsNaN(val) || float.IsInfinity(val)){ continue; } vals.Add(val); } totalProfile[i] = vals.Count > 0 ? ArrayUtils.Median(vals) : float.NaN; } for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ExpressionColumnCount; j++){ mdata[i, j] -= totalProfile[j]; } } }