public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> access = param.GetParamWithSubParams <int>("Matrix access"); bool rows = access.Value == 0; int groupInd; if (rows) { groupInd = access.GetSubParameters().GetParam <int>("Grouping").Value - 1; } else { groupInd = -1; } bool report = param.GetParam <bool>("Report mean and std. dev.").Value; bool median = param.GetParam <bool>("Use median").Value; if (groupInd < 0) { Zscore(rows, mdata, processInfo.NumThreads, report, median, out double[] means, out double[] stddevs); if (report) { if (rows) { mdata.AddNumericColumn("Mean", "Mean", means); mdata.AddNumericColumn("Std. dev.", "Std. dev.", stddevs); } else { mdata.AddNumericRow("Mean", "Mean", means); mdata.AddNumericRow("Std. dev.", "Std. dev.", stddevs); } } } else { string[][] catRow = mdata.GetCategoryRowAt(groupInd); foreach (string[] t in catRow) { if (t.Length > 1) { processInfo.ErrString = "The groups are overlapping."; return; } } string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(catRow); ZscoreGroups(mdata, catRow, processInfo.NumThreads, report, median, groupVals, out double[][] means, out double[][] stddevs); if (report) { for (int i = 0; i < groupVals.Length; i++) { mdata.AddNumericColumn("Mean " + groupVals[i], "Mean", means[i]); mdata.AddNumericColumn("Std. dev. " + groupVals[i], "Std. dev.", stddevs[i]); } } } }
public void WriteMatrixTest() { // main data IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] { { 1, 2, 3 }, { 3, 4, 5 } }, new List <string> { "col1", "col2", "col3" }); // annotation rows mdata.AddCategoryRow("catrow", "this is catrow", new[] { new[] { "cat1" }, new[] { "cat1", "cat2" }, new[] { "cat2" } }); mdata.AddNumericRow("numrow", "this is numrow", new[] { -1.0, 1, 2 }); // annotation columns mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" }); mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" }); mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 }); mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} }); mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } }); string mdataStr; using (MemoryStream memstream = new MemoryStream()) using (StreamWriter writer = new StreamWriter(memstream)) { PerseusUtils.WriteMatrix(mdata, writer); writer.Flush(); mdataStr = Encoding.UTF8.GetString(memstream.ToArray()); } IMatrixData mdata2 = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrix(mdata2, new ProcessInfo(new Settings(), status => { }, progress => { }, 1), () => { StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr))); return(tmpStream); }, "matrix1", '\t'); Assert.AreEqual(2, mdata2.RowCount); Assert.AreEqual(3, mdata2.ColumnCount); Assert.AreEqual(2, mdata2.StringColumnCount); Assert.AreEqual(1, mdata2.NumericColumnCount); Assert.AreEqual(1, mdata2.CategoryColumnCount); Assert.AreEqual(1, mdata2.MultiNumericColumnCount); Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]); Assert.AreEqual(1, mdata2.CategoryRowCount); Assert.AreEqual(1, mdata2.NumericRowCount); }
private static void LoadMatrixData(IList<string> colNames, IList<string> colDescriptions, IList<int> mainColIndices, IList<int> catColIndices, IList<int> numColIndices, IList<int> textColIndices, IList<int> multiNumColIndices, string origin, IMatrixData matrixData, IDictionary<string, string[]> annotationRows, Action<int> progress, Action<string> status, char separator, TextReader reader, StreamReader auxReader, int nrows, bool shortenExpressionNames, List<Tuple<Relation[], int[], bool>> filters) { Dictionary<string, string[]> catAnnotatRows; Dictionary<string, string[]> numAnnotatRows; status("Reading data"); SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows); List<string[][]> categoryAnnotation = new List<string[][]>(); for (int i = 0; i < catColIndices.Count; i++){ categoryAnnotation.Add(new string[nrows][]); } List<double[]> numericAnnotation = new List<double[]>(); for (int i = 0; i < numColIndices.Count; i++){ numericAnnotation.Add(new double[nrows]); } List<double[][]> multiNumericAnnotation = new List<double[][]>(); for (int i = 0; i < multiNumColIndices.Count; i++){ multiNumericAnnotation.Add(new double[nrows][]); } List<string[]> stringAnnotation = new List<string[]>(); for (int i = 0; i < textColIndices.Count; i++){ stringAnnotation.Add(new string[nrows]); } float[,] mainValues = new float[nrows, mainColIndices.Count]; float[,] qualityValues = null; bool[,] isImputedValues = null; bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator); if (hasAddtlMatrices){ qualityValues = new float[nrows, mainColIndices.Count]; isImputedValues = new bool[nrows, mainColIndices.Count]; } reader.ReadLine(); int count = 0; string line; while ((line = reader.ReadLine()) != null){ progress(100*(count + 1)/nrows); if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)){ continue; } string[] w; if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices)){ continue; } for (int i = 0; i < mainColIndices.Count; i++){ if (mainColIndices[i] >= w.Length){ mainValues[count, i] = float.NaN; } else{ string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]); if (hasAddtlMatrices){ ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]); } else{ if (count < mainValues.GetLength(0)){ bool success = float.TryParse(s, out mainValues[count, i]); if (!success){ mainValues[count, i] = float.NaN; } } } } } for (int i = 0; i < numColIndices.Count; i++){ if (numColIndices[i] >= w.Length){ numericAnnotation[i][count] = double.NaN; } else{ double q; bool success = double.TryParse(w[numColIndices[i]].Trim(), out q); if (numericAnnotation[i].Length > count){ numericAnnotation[i][count] = success ? q : double.NaN; } } } for (int i = 0; i < multiNumColIndices.Count; i++){ if (multiNumColIndices[i] >= w.Length){ multiNumericAnnotation[i][count] = new double[0]; } else{ string q = w[multiNumColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){ q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){ q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); multiNumericAnnotation[i][count] = new double[ww.Length]; for (int j = 0; j < ww.Length; j++){ double q1; bool success = double.TryParse(ww[j], out q1); multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN; } } } for (int i = 0; i < catColIndices.Count; i++){ if (catColIndices[i] >= w.Length){ categoryAnnotation[i][count] = new string[0]; } else{ string q = w[catColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){ q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){ q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); List<int> valids = new List<int>(); for (int j = 0; j < ww.Length; j++){ ww[j] = ww[j].Trim(); if (ww[j].Length > 0){ valids.Add(j); } } ww = ArrayUtils.SubArray(ww, valids); Array.Sort(ww); if (categoryAnnotation[i].Length > count){ categoryAnnotation[i][count] = ww; } } } for (int i = 0; i < textColIndices.Count; i++){ if (textColIndices[i] >= w.Length){ stringAnnotation[i][count] = ""; } else{ string q = w[textColIndices[i]].Trim(); if (stringAnnotation[i].Length > count){ stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q)); } } } count++; } reader.Close(); string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices); if (shortenExpressionNames){ columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true); } string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices); string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices); string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices); string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices); matrixData.Name = origin; matrixData.ColumnNames = RemoveQuotes(columnNames); matrixData.Values.Set(mainValues); if (hasAddtlMatrices){ matrixData.Quality.Set(qualityValues); matrixData.IsImputed.Set(isImputedValues); } else{ matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]); matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]); } matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames), categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames), multiNumericAnnotation); if (colDescriptions != null){ string[] columnDesc = ArrayUtils.SubArray(colDescriptions, mainColIndices); string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices); string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices); string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices); string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices); matrixData.ColumnDescriptions = new List<string>(columnDesc); matrixData.NumericColumnDescriptions = new List<string>(numColDesc); matrixData.CategoryColumnDescriptions = new List<string>(catColDesc); matrixData.StringColumnDescriptions = new List<string>(textColDesc); matrixData.MultiNumericColumnDescriptions = new List<string>(multiNumColDesc); } foreach (string key in catAnnotatRows.Keys){ string name = key; string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices); string[][] cat = new string[svals.Length][]; for (int i = 0; i < cat.Length; i++){ string s = svals[i].Trim(); cat[i] = s.Length > 0 ? s.Split(';') : new string[0]; List<int> valids = new List<int>(); for (int j = 0; j < cat[i].Length; j++){ cat[i][j] = cat[i][j].Trim(); if (cat[i][j].Length > 0){ valids.Add(j); } } cat[i] = ArrayUtils.SubArray(cat[i], valids); Array.Sort(cat[i]); } matrixData.AddCategoryRow(name, name, cat); } foreach (string key in numAnnotatRows.Keys){ string name = key; string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices); double[] num = new double[svals.Length]; for (int i = 0; i < num.Length; i++){ string s = svals[i].Trim(); num[i] = double.NaN; double.TryParse(s, out num[i]); } matrixData.AddNumericRow(name, name, num); } matrixData.Origin = origin; progress(0); status(""); }
private static void ProcessDataCreate(IMatrixData mdata, Parameters param) { string name = param.GetStringParam("Row name").Value; double[] groupCol = new double[mdata.ExpressionColumnCount]; for (int i = 0; i < mdata.ExpressionColumnCount; i++){ string ename = mdata.ExpressionColumnNames[i]; double value = param.GetDoubleParam(ename).Value; groupCol[i] = value; } mdata.AddNumericRow(name, name, groupCol); }
private static void LoadMatrixData(IList <string> colNames, IList <string> colDescriptions, IList <int> mainColIndices, IList <int> catColIndices, IList <int> numColIndices, IList <int> textColIndices, IList <int> multiNumColIndices, string origin, IMatrixData matrixData, IDictionary <string, string[]> annotationRows, Action <int> progress, Action <string> status, char separator, TextReader reader, StreamReader auxReader, int nrows, bool shortenExpressionNames, List <Tuple <Relation[], int[], bool> > filters) { Dictionary <string, string[]> catAnnotatRows; Dictionary <string, string[]> numAnnotatRows; status("Reading data"); SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows); List <string[][]> categoryAnnotation = new List <string[][]>(); for (int i = 0; i < catColIndices.Count; i++) { categoryAnnotation.Add(new string[nrows][]); } List <double[]> numericAnnotation = new List <double[]>(); for (int i = 0; i < numColIndices.Count; i++) { numericAnnotation.Add(new double[nrows]); } List <double[][]> multiNumericAnnotation = new List <double[][]>(); for (int i = 0; i < multiNumColIndices.Count; i++) { multiNumericAnnotation.Add(new double[nrows][]); } List <string[]> stringAnnotation = new List <string[]>(); for (int i = 0; i < textColIndices.Count; i++) { stringAnnotation.Add(new string[nrows]); } float[,] mainValues = new float[nrows, mainColIndices.Count]; float[,] qualityValues = null; bool[,] isImputedValues = null; bool hasAddtlMatrices = auxReader != null && GetHasAddtlMatrices(auxReader, mainColIndices, separator); if (hasAddtlMatrices) { qualityValues = new float[nrows, mainColIndices.Count]; isImputedValues = new bool[nrows, mainColIndices.Count]; } reader.ReadLine(); int count = 0; string line; while ((line = reader.ReadLine()) != null) { progress(100 * (count + 1) / nrows); if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)) { continue; } string[] w; if (!IsValidLine(line, separator, filters, out w, hasAddtlMatrices)) { continue; } for (int i = 0; i < mainColIndices.Count; i++) { if (mainColIndices[i] >= w.Length) { mainValues[count, i] = float.NaN; } else { string s = StringUtils.RemoveWhitespace(w[mainColIndices[i]]); if (hasAddtlMatrices) { ParseExp(s, out mainValues[count, i], out isImputedValues[count, i], out qualityValues[count, i]); } else { if (count < mainValues.GetLength(0)) { bool success = float.TryParse(s, out mainValues[count, i]); if (!success) { mainValues[count, i] = float.NaN; } } } } } for (int i = 0; i < numColIndices.Count; i++) { if (numColIndices[i] >= w.Length) { numericAnnotation[i][count] = double.NaN; } else { double q; bool success = double.TryParse(w[numColIndices[i]].Trim(), out q); if (numericAnnotation[i].Length > count) { numericAnnotation[i][count] = success ? q : double.NaN; } } } for (int i = 0; i < multiNumColIndices.Count; i++) { if (multiNumColIndices[i] >= w.Length) { multiNumericAnnotation[i][count] = new double[0]; } else { string q = w[multiNumColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"') { q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'') { q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); multiNumericAnnotation[i][count] = new double[ww.Length]; for (int j = 0; j < ww.Length; j++) { double q1; bool success = double.TryParse(ww[j], out q1); multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN; } } } for (int i = 0; i < catColIndices.Count; i++) { if (catColIndices[i] >= w.Length) { categoryAnnotation[i][count] = new string[0]; } else { string q = w[catColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"') { q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\'') { q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); List <int> valids = new List <int>(); for (int j = 0; j < ww.Length; j++) { ww[j] = ww[j].Trim(); if (ww[j].Length > 0) { valids.Add(j); } } ww = ArrayUtils.SubArray(ww, valids); Array.Sort(ww); if (categoryAnnotation[i].Length > count) { categoryAnnotation[i][count] = ww; } } } for (int i = 0; i < textColIndices.Count; i++) { if (textColIndices[i] >= w.Length) { stringAnnotation[i][count] = ""; } else { string q = w[textColIndices[i]].Trim(); if (stringAnnotation[i].Length > count) { stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q)); } } } count++; } reader.Close(); string[] columnNames = ArrayUtils.SubArray(colNames, mainColIndices); if (shortenExpressionNames) { columnNames = StringUtils.RemoveCommonSubstrings(columnNames, true); } string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices); string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices); string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices); string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices); matrixData.Name = origin; matrixData.ColumnNames = RemoveQuotes(columnNames); matrixData.Values.Set(mainValues); if (hasAddtlMatrices) { matrixData.Quality.Set(qualityValues); matrixData.IsImputed.Set(isImputedValues); } else { matrixData.Quality.Set(new float[mainValues.GetLength(0), mainValues.GetLength(1)]); matrixData.IsImputed.Set(new bool[mainValues.GetLength(0), mainValues.GetLength(1)]); } matrixData.SetAnnotationColumns(RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames), categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames), multiNumericAnnotation); if (colDescriptions != null) { string[] columnDesc = ArrayUtils.SubArray(colDescriptions, mainColIndices); string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices); string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices); string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices); string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices); matrixData.ColumnDescriptions = new List <string>(columnDesc); matrixData.NumericColumnDescriptions = new List <string>(numColDesc); matrixData.CategoryColumnDescriptions = new List <string>(catColDesc); matrixData.StringColumnDescriptions = new List <string>(textColDesc); matrixData.MultiNumericColumnDescriptions = new List <string>(multiNumColDesc); } foreach (string key in catAnnotatRows.Keys) { string name = key; string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], mainColIndices); string[][] cat = new string[svals.Length][]; for (int i = 0; i < cat.Length; i++) { string s = svals[i].Trim(); cat[i] = s.Length > 0 ? s.Split(';') : new string[0]; List <int> valids = new List <int>(); for (int j = 0; j < cat[i].Length; j++) { cat[i][j] = cat[i][j].Trim(); if (cat[i][j].Length > 0) { valids.Add(j); } } cat[i] = ArrayUtils.SubArray(cat[i], valids); Array.Sort(cat[i]); } matrixData.AddCategoryRow(name, name, cat); } foreach (string key in numAnnotatRows.Keys) { string name = key; string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], mainColIndices); double[] num = new double[svals.Length]; for (int i = 0; i < num.Length; i++) { string s = svals[i].Trim(); num[i] = double.NaN; double.TryParse(s, out num[i]); } matrixData.AddNumericRow(name, name, num); } matrixData.Origin = origin; progress(0); status(""); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] outputColumns = param.GetParam <int[]>("Output").Value; int proteinIdColumnInd = param.GetParam <int>("Protein IDs").Value; string[] proteinIds = mdata.StringColumns[proteinIdColumnInd]; int[] intensityCols = param.GetParam <int[]>("Intensities").Value; if (intensityCols.Length == 0) { processInfo.ErrString = "Please select at least one column containing protein intensities."; return; } // variable to hold all intensity values List <double[]> columns = new List <double[]>(); string[] inputNames = new string[intensityCols.Length]; string[] sampleNames = new string[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++) { double[] values; if (intensityCols[col] < mdata.ColumnCount) { values = ArrayUtils.ToDoubles(mdata.Values.GetColumn(intensityCols[col])); inputNames[col] = mdata.ColumnNames[intensityCols[col]]; } else { values = mdata.NumericColumns[intensityCols[col] - mdata.ColumnCount]; inputNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ColumnCount]; } sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(inputNames[col]).Groups[1].Value; columns.Add(values); } // average over columns if this option is selected if (param.GetParamWithSubParams <int>("Averaging mode").Value == 3) { double[] column = new double[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++) { double[] values = new double[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++) { values[col] = columns[col][row]; } column[row] = ArrayUtils.Median(ExtractValidValues(values, false)); } // delete the original list of columns columns = new List <double[]> { column }; sampleNames = new[] { "" }; } // revert logarithm if necessary if (param.GetParamWithSubParams <bool>("Logarithmized").Value) { double[] logBases = new[] { 2, Math.E, 10 }; double logBase = logBases[param.GetParamWithSubParams <bool>("Logarithmized").GetSubParameters().GetParam <int>("log base").Value]; foreach (double[] t in columns) { for (int row = 0; row < mdata.RowCount; row++) { if (t[row] == 0) { processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!"; } t[row] = Math.Pow(logBase, t[row]); } } } double[] mw = mdata.NumericColumns[param.GetParam <int>("Molecular masses").Value]; // define whether the molecular masses are given in Da or kDa if (ArrayUtils.Median(mw) < 250) // most likely kDa { for (int i = 0; i < mw.Length; i++) { mw[i] *= 1000; } } double[] detectabilityNormFactor = mw; if (param.GetParamWithSubParams <bool>("Detectability correction").Value) { detectabilityNormFactor = mdata.NumericColumns[ param.GetParamWithSubParams <bool>("Detectability correction").GetSubParameters().GetParam <int>("Correction factor") .Value]; } // the normalization factor needs to be nonzero for all proteins // check and replace with 1 for all relevant cases for (int row = 0; row < mdata.RowCount; row++) { if (detectabilityNormFactor[row] == 0 || double.IsNaN(detectabilityNormFactor[row])) { detectabilityNormFactor[row] = 1; } } // detect the organism Organism organism = DetectOrganism(proteinIds); // c value the amount of DNA per haploid genome, see: http://en.wikipedia.org/wiki/C-value double cValue = organism.genomeSize * basePairWeight / avogadro; // find the histones int[] histoneRows = FindHistones(proteinIds, organism); // write a categorical column indicating the histones string[][] histoneCol = new string[mdata.RowCount][]; for (int row = 0; row < mdata.RowCount; row++) { histoneCol[row] = ArrayUtils.Contains(histoneRows, row) ? new[] { "+" } : new string[0]; } mdata.AddCategoryColumn("Histones", "", histoneCol); // initialize the variables for the annotation rows string[] sampleNameRow = new string[mdata.ColumnCount]; string[] inputNameRow = new string[mdata.ColumnCount]; double[] totalProteinRow = new double[mdata.ColumnCount]; double[] totalMoleculesRow = new double[mdata.ColumnCount]; string[][] organismRow = new string[mdata.ColumnCount][]; // populate the organismRow variable with empty strings as defaults (not null, which may cause errors when writing the annotations in the end.) for (int i = 0; i < organismRow.Length; i++) { organismRow[i] = new[] { "N/A" }; } double[] histoneMassRow = new double[mdata.ColumnCount]; double[] ploidyRow = new double[mdata.ColumnCount]; double[] cellVolumeRow = new double[mdata.ColumnCount]; double[] normalizationFactors = new double[columns.Count]; // calculate normalization factors for each column for (int col = 0; col < columns.Count; col++) { string sampleName = sampleNames[col]; double[] column = columns[col]; // normalization factor to go from intensities to copies, // needs to be determined either using the total protein or the histone scaling approach double factor; switch (param.GetParamWithSubParams <int>("Scaling mode").Value) { case 0: // total protein amount double mwWeightedNormalizedSummedIntensities = 0; for (int row = 0; row < mdata.RowCount; row++) { if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])) { mwWeightedNormalizedSummedIntensities += column[row] / detectabilityNormFactor[row] * mw[row]; } } factor = param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>( "Protein amount per cell [pg]").Value *1e-12 * avogadro / mwWeightedNormalizedSummedIntensities; break; case 1: // histone mode double mwWeightedNormalizedSummedHistoneIntensities = 0; foreach (int row in histoneRows) { if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])) { mwWeightedNormalizedSummedHistoneIntensities += column[row] / detectabilityNormFactor[row] * mw[row]; } } double ploidy = param.GetParamWithSubParams <int>("Scaling mode").GetSubParameters().GetParam <double>("Ploidy").Value; factor = cValue * ploidy * avogadro / mwWeightedNormalizedSummedHistoneIntensities; break; default: factor = 1; break; } normalizationFactors[col] = factor; } // check averaging mode if (param.GetParamWithSubParams <int>("Averaging mode").Value == 1) // same factor for all { double factor = ArrayUtils.Mean(normalizationFactors); for (int i = 0; i < normalizationFactors.Length; i++) { normalizationFactors[i] = factor; } } if (param.GetParamWithSubParams <int>("Averaging mode").Value == 2) // same factor in each group { if (param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value == -1) { processInfo.ErrString = "No grouping selected."; return; } string[][] groupNames = mdata.GetCategoryRowAt( param.GetParamWithSubParams <int>("Averaging mode").GetSubParameters().GetParam <int>("Grouping").Value); string[] uniqueGroupNames = Unique(groupNames); int[] grouping = new int[columns.Count]; for (int i = 0; i < columns.Count; i++) { if (intensityCols[i] >= mdata.ColumnCount) // Numeric annotation columns cannot be grouped { grouping[i] = i; continue; } if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])) { grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]); continue; } grouping[i] = i; } Dictionary <int, List <double> > factors = new Dictionary <int, List <double> >(); for (int i = 0; i < columns.Count; i++) { if (factors.ContainsKey(grouping[i])) { factors[grouping[i]].Add(normalizationFactors[i]); } else { factors.Add(grouping[i], new List <double> { normalizationFactors[i] }); } } double[] averagedNormalizationFactors = new double[columns.Count]; for (int i = 0; i < columns.Count; i++) { List <double> factor; factors.TryGetValue(grouping[i], out factor); averagedNormalizationFactors[i] = ArrayUtils.Mean(factor); } normalizationFactors = averagedNormalizationFactors; } // loop over all selected columns and calculate copy numbers for (int col = 0; col < columns.Count; col++) { string sampleName = sampleNames[col]; double[] column = columns[col]; double factor = normalizationFactors[col]; double[] copyNumbers = new double[mdata.RowCount]; double[] concentrations = new double[mdata.RowCount]; // femtoliters double[] massFraction = new double[mdata.RowCount]; double[] moleFraction = new double[mdata.RowCount]; double totalProtein = 0; // picograms double histoneMass = 0; // picograms double totalMolecules = 0; for (int row = 0; row < mdata.RowCount; row++) { if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])) { copyNumbers[row] = column[row] / detectabilityNormFactor[row] * factor; totalMolecules += copyNumbers[row]; totalProtein += copyNumbers[row] * mw[row] * 1e12 / avogadro; // picograms if (ArrayUtils.Contains(histoneRows, row)) { histoneMass += copyNumbers[row] * mw[row] * 1e12 / avogadro; // picograms } } } double totalVolume = totalProtein / param.GetParam <double>("Total cellular protein concentration [g/l]").Value * 1000; // femtoliters for (int row = 0; row < mdata.RowCount; row++) { if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])) { concentrations[row] = copyNumbers[row] / (totalVolume * 1e-15) / avogadro * 1e9; // nanomolar massFraction[row] = copyNumbers[row] * mw[row] * 1e12 / avogadro / totalProtein * 1e6; // ppm moleFraction[row] = copyNumbers[row] / totalMolecules * 1e6; // ppm } } string suffix = sampleName == "" ? "" : " " + sampleName; if (ArrayUtils.Contains(outputColumns, 0)) { mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers); } if (ArrayUtils.Contains(outputColumns, 1)) { mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations); } if (ArrayUtils.Contains(outputColumns, 2)) { mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction); } if (ArrayUtils.Contains(outputColumns, 3)) { mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction); } double[] rank = ArrayUtils.Rank(copyNumbers); double[] relativeRank = new double[mdata.RowCount]; double validRanks = mdata.RowCount; for (int row = 0; row < mdata.RowCount; row++) { // remove rank for protein with no copy number information if (double.IsNaN(copyNumbers[row]) || copyNumbers[row] == 0) { rank[row] = double.NaN; validRanks--; // do not consider as valid } // invert ranking, so that rank 0 is the most abundant protein rank[row] = mdata.RowCount - rank[row]; } for (int row = 0; row < mdata.RowCount; row++) { relativeRank[row] = rank[row] / validRanks; } if (ArrayUtils.Contains(outputColumns, 4)) { mdata.AddNumericColumn("Copy number rank" + suffix, "", rank); } if (ArrayUtils.Contains(outputColumns, 5)) { mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank); } if (intensityCols[col] < mdata.ColumnCount && param.GetParamWithSubParams <int>("Averaging mode").Value != 3) { inputNameRow[intensityCols[col]] = inputNames[col]; sampleNameRow[intensityCols[col]] = sampleNames[col]; totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2); totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0); organismRow[intensityCols[col]] = new[] { organism.name }; histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4); ploidyRow[intensityCols[col]] = Math.Round(histoneMass * 1e-12 / cValue, 2); cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters } } // Summary annotation row if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)) { mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow); mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow); mdata.AddCategoryRow("Organism", "", organismRow); mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow); mdata.AddNumericRow("Ploidy", "", ploidyRow); mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow); } // Summary matrix if (param.GetParamWithSubParams <int>("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 7)) { supplTables = new IMatrixData[1]; IMatrixData supplTab = PerseusFactory.CreateMatrixData(); supplTab.ColumnNames = new List <string>(); supplTab.Values.Init(totalProteinRow.Length, 0); supplTab.SetAnnotationColumns(new List <string> { "Sample", "Input Column" }, new List <string[]>() { sampleNameRow, inputNameRow }, new List <string>() { "Organism" }, new List <string[][]>() { organismRow }, new List <string>() { "Total protein [pg/cell]", "Total molecules per cell", "Histone mass [pg/cell]", "Ploidy", "Cell volume [fl]" }, new List <double[]>() { totalProteinRow, totalMoleculesRow, histoneMassRow, ploidyRow, cellVolumeRow }, new List <string>(), new List <double[][]>()); supplTables[0] = supplTab; } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] outputColumns = param.GetMultiChoiceParam("Output").Value; int proteinIdColumnInd = param.GetSingleChoiceParam("Protein IDs").Value; string[] proteinIds = mdata.StringColumns[proteinIdColumnInd]; int[] intensityCols = param.GetMultiChoiceParam("Intensities").Value; if (intensityCols.Length == 0){ processInfo.ErrString = "Please select at least one column containing protein intensities."; return; } // variable to hold all intensity values List<double[]> columns = new List<double[]>(); string[] sampleNames = new string[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++){ double[] values; if (intensityCols[col] < mdata.ExpressionColumnCount){ values = ArrayUtils.ToDoubles(mdata.GetExpressionColumn(intensityCols[col])); sampleNames[col] = mdata.ExpressionColumnNames[intensityCols[col]]; } else{ values = mdata.NumericColumns[intensityCols[col] - mdata.ExpressionColumnCount]; sampleNames[col] = mdata.NumericColumnNames[intensityCols[col] - mdata.ExpressionColumnCount]; } sampleNames[col] = new Regex(@"^(?:(?:LFQ )?[Ii]ntensity )?(.*)$").Match(sampleNames[col]).Groups[1].Value; columns.Add(values); } // average over columns if this option is selected if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 3){ double[] column = new double[mdata.RowCount]; for (int row = 0; row < mdata.RowCount; row++){ double[] values = new double[intensityCols.Length]; for (int col = 0; col < intensityCols.Length; col++){ values[col] = columns[col][row]; } column[row] = ArrayUtils.Median(ExtractValidValues(values, false)); } // delete the original list of columns columns = new List<double[]>{column}; sampleNames = new[]{""}; } // revert logarithm if necessary if (param.GetBoolWithSubParams("Logarithmized").Value){ double[] logBases = new[]{2, Math.E, 10}; double logBase = logBases[param.GetBoolWithSubParams("Logarithmized").GetSubParameters().GetSingleChoiceParam("log base").Value]; foreach (double[] t in columns){ for (int row = 0; row < mdata.RowCount; row++){ if (t[row] == 0){ processInfo.ErrString = "Are the columns really logarithmized?\nThey contain zeroes!"; } t[row] = Math.Pow(logBase, t[row]); } } } double[] mw = mdata.NumericColumns[param.GetSingleChoiceParam("Molecular masses").Value]; // detect whether the molecular masses are given in Da or kDa if (ArrayUtils.Median(mw) < 250) // likely kDa { for (int i = 0; i < mw.Length; i++){ mw[i] *= 1000; } } double[] detectabilityNormFactor = mw; if (param.GetBoolWithSubParams("Detectability correction").Value){ detectabilityNormFactor = mdata.NumericColumns[ param.GetBoolWithSubParams("Detectability correction") .GetSubParameters() .GetSingleChoiceParam("Correction factor") .Value]; } // the normalization factor needs to be nonzero for all proteins // check and replace with 1 for all relevant cases for (int row = 0; row < mdata.RowCount; row++){ if (detectabilityNormFactor[row] == 0 || detectabilityNormFactor[row] == double.NaN){ detectabilityNormFactor[row] = 1; } } // detect the organism Organism organism = DetectOrganism(proteinIds); // c value the amount of DNA per cell, see: http://en.wikipedia.org/wiki/C-value double cValue = (organism.genomeSize*basePairWeight)/avogadro; // find the histones int[] histoneRows = FindHistones(proteinIds, organism); // write a categorical column indicating the histones string[][] histoneCol = new string[mdata.RowCount][]; for (int row = 0; row < mdata.RowCount; row++){ histoneCol[row] = (ArrayUtils.Contains(histoneRows, row)) ? new[]{"+"} : new[]{""}; } mdata.AddCategoryColumn("Histones", "", histoneCol); // initialize the variables for the annotation rows double[] totalProteinRow = new double[mdata.ExpressionColumnCount]; double[] totalMoleculesRow = new double[mdata.ExpressionColumnCount]; string[][] organismRow = new string[mdata.ExpressionColumnCount][]; double[] histoneMassRow = new double[mdata.ExpressionColumnCount]; double[] ploidyRow = new double[mdata.ExpressionColumnCount]; double[] cellVolumeRow = new double[mdata.ExpressionColumnCount]; double[] normalizationFactors = new double[columns.Count]; // calculate normalization factors for each column for (int col = 0; col < columns.Count; col++){ string sampleName = sampleNames[col]; double[] column = columns[col]; // normalization factor to go from intensities to copies, // needs to be determined either using the total protein or the histone scaling approach double factor; switch (param.GetSingleChoiceWithSubParams("Scaling mode").Value){ case 0: // total protein amount double mwWeightedNormalizedSummedIntensities = 0; for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ mwWeightedNormalizedSummedIntensities += (column[row]/detectabilityNormFactor[row])*mw[row]; } } factor = (param.GetSingleChoiceWithSubParams("Scaling mode") .GetSubParameters() .GetDoubleParam("Protein amount per cell [pg]") .Value*1e-12*avogadro)/mwWeightedNormalizedSummedIntensities; break; case 1: // histone mode double mwWeightedNormalizedSummedHistoneIntensities = 0; foreach (int row in histoneRows){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ mwWeightedNormalizedSummedHistoneIntensities += (column[row]/detectabilityNormFactor[row])*mw[row]; } } double ploidy = param.GetSingleChoiceWithSubParams("Scaling mode").GetSubParameters().GetDoubleParam("Ploidy").Value; factor = (cValue*ploidy*avogadro)/mwWeightedNormalizedSummedHistoneIntensities; break; default: factor = 1; break; } normalizationFactors[col] = factor; } // check averaging mode if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 1) // same factor for all { double factor = ArrayUtils.Mean(normalizationFactors); for (int i = 0; i < normalizationFactors.Length; i++){ normalizationFactors[i] = factor; } } if (param.GetSingleChoiceWithSubParams("Averaging mode").Value == 2) // same factor in each group { if ( param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value == -1){ processInfo.ErrString = "No grouping selected."; return; } string[][] groupNames = mdata.GetCategoryRowAt( param.GetSingleChoiceWithSubParams("Averaging mode").GetSubParameters().GetSingleChoiceParam("Grouping").Value); string[] uniqueGroupNames = Unique(groupNames); int[] grouping = new int[columns.Count]; for (int i = 0; i < columns.Count; i++){ if (intensityCols[i] >= mdata.ExpressionColumnCount){ // Numeric annotation columns cannot be grouped grouping[i] = i; continue; } if (ArrayUtils.Contains(uniqueGroupNames, groupNames[i][0])){ grouping[i] = ArrayUtils.IndexOf(uniqueGroupNames, groupNames[i][0]); continue; } grouping[i] = i; } Dictionary<int, List<double>> factors = new Dictionary<int, List<double>>(); for (int i = 0; i < columns.Count; i++){ if (factors.ContainsKey(grouping[i])){ factors[grouping[i]].Add(normalizationFactors[i]); } else{ factors.Add(grouping[i], new List<double>{normalizationFactors[i]}); } } double[] averagedNormalizationFactors = new double[columns.Count]; for (int i = 0; i < columns.Count; i++){ List<double> factor; factors.TryGetValue(grouping[i], out factor); averagedNormalizationFactors[i] = ArrayUtils.Mean(factor); } normalizationFactors = averagedNormalizationFactors; } // loop over all selected columns and calculate copy numbers for (int col = 0; col < columns.Count; col++){ string sampleName = sampleNames[col]; double[] column = columns[col]; double factor = normalizationFactors[col]; double[] copyNumbers = new double[mdata.RowCount]; double[] concentrations = new double[mdata.RowCount]; // femtoliters double[] massFraction = new double[mdata.RowCount]; double[] moleFraction = new double[mdata.RowCount]; double totalProtein = 0; // picograms double histoneMass = 0; // picograms double totalMolecules = 0; for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ copyNumbers[row] = (column[row]/detectabilityNormFactor[row])*factor; totalMolecules += copyNumbers[row]; totalProtein += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms if (ArrayUtils.Contains(histoneRows, row)){ histoneMass += (copyNumbers[row]*mw[row]*1e12)/avogadro; // picograms } } } double totalVolume = (totalProtein/(param.GetDoubleParam("Total cellular protein concentration [g/l]").Value))*1000; // femtoliters for (int row = 0; row < mdata.RowCount; row++){ if (!double.IsNaN(column[row]) && !double.IsNaN(mw[row])){ concentrations[row] = ((copyNumbers[row]/(totalVolume*1e-15))/avogadro)*1e9; // nanomolar massFraction[row] = (((copyNumbers[row]*mw[row]*1e12)/avogadro)/totalProtein)*1e6; // ppm moleFraction[row] = (copyNumbers[row]/totalMolecules)*1e6; // ppm } } string suffix = (sampleName == "") ? "" : " " + sampleName; if (ArrayUtils.Contains(outputColumns, 0)){ mdata.AddNumericColumn("Copy number" + suffix, "", copyNumbers); } if (ArrayUtils.Contains(outputColumns, 1)){ mdata.AddNumericColumn("Concentration [nM]" + suffix, "", concentrations); } if (ArrayUtils.Contains(outputColumns, 2)){ mdata.AddNumericColumn("Abundance (mass/total mass) [*10^-6]" + suffix, "", massFraction); } if (ArrayUtils.Contains(outputColumns, 3)){ mdata.AddNumericColumn("Abundance (molecules/total molecules) [*10^-6]" + suffix, "", moleFraction); } double[] rank = ArrayUtils.Rank(copyNumbers); double[] relativeRank = new double[mdata.RowCount]; double validRanks = mdata.RowCount; for (int row = 0; row < mdata.RowCount; row++){ // remove rank for protein with no copy number information if (double.IsNaN((copyNumbers[row])) || copyNumbers[row] == 0){ rank[row] = double.NaN; validRanks--; // do not consider as valid } // invert ranking, so that rank 0 is the most abundant protein rank[row] = mdata.RowCount - rank[row]; } for (int row = 0; row < mdata.RowCount; row++){ relativeRank[row] = rank[row]/validRanks; } if (ArrayUtils.Contains(outputColumns, 4)){ mdata.AddNumericColumn("Copy number rank" + suffix, "", rank); } if (ArrayUtils.Contains(outputColumns, 5)){ mdata.AddNumericColumn("Relative copy number rank" + suffix, "", relativeRank); } if (intensityCols[col] < mdata.ExpressionColumnCount && param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3){ totalProteinRow[intensityCols[col]] = Math.Round(totalProtein, 2); totalMoleculesRow[intensityCols[col]] = Math.Round(totalMolecules, 0); organismRow[intensityCols[col]] = new string[]{organism.name}; histoneMassRow[intensityCols[col]] = Math.Round(histoneMass, 4); ploidyRow[intensityCols[col]] = Math.Round((histoneMass*1e-12)/cValue, 2); cellVolumeRow[intensityCols[col]] = Math.Round(totalVolume, 2); // femtoliters } } if (param.GetSingleChoiceWithSubParams("Averaging mode").Value != 3 && ArrayUtils.Contains(outputColumns, 6)){ mdata.AddNumericRow("Total protein [pg/cell]", "", totalProteinRow); mdata.AddNumericRow("Total molecules per cell", "", totalMoleculesRow); mdata.AddCategoryRow("Organism", "", organismRow); mdata.AddNumericRow("Histone mass [pg/cell]", "", histoneMassRow); mdata.AddNumericRow("Ploidy", "", ploidyRow); mdata.AddNumericRow("Cell volume [fl]", "", cellVolumeRow); } }
private static void LoadData(IList<string> colNames, IList<string> colDescriptions, IList<int> expressionColIndices, IList<int> catColIndices, IList<int> numColIndices, IList<int> textColIndices, IList<int> multiNumColIndices, string filename, IMatrixData matrixData, IDictionary<string, string[]> annotationRows, Action<int> progress, Action<string> status) { Dictionary<string, string[]> catAnnotatRows; Dictionary<string, string[]> numAnnotatRows; status("Reading data"); SplitAnnotRows(annotationRows, out catAnnotatRows, out numAnnotatRows); int nrows = TabSep.GetRowCount(filename, 0, commentPrefix, commentPrefixExceptions); float[,] expressionValues = new float[nrows,expressionColIndices.Count]; List<string[][]> categoryAnnotation = new List<string[][]>(); foreach (int t in catColIndices){ categoryAnnotation.Add(new string[nrows][]); } List<double[]> numericAnnotation = new List<double[]>(); foreach (int t in numColIndices){ numericAnnotation.Add(new double[nrows]); } List<double[][]> multiNumericAnnotation = new List<double[][]>(); foreach (int t in multiNumColIndices){ multiNumericAnnotation.Add(new double[nrows][]); } List<string[]> stringAnnotation = new List<string[]>(); foreach (int t in textColIndices){ stringAnnotation.Add(new string[nrows]); } StreamReader reader = new StreamReader(filename); reader.ReadLine(); int count = 0; string line; while ((line = reader.ReadLine()) != null){ progress((100*(count + 1))/nrows); if (TabSep.IsCommentLine(line, commentPrefix, commentPrefixExceptions)){ continue; } string[] w = line.Split('\t'); for (int i = 0; i < expressionColIndices.Count; i++){ if (expressionColIndices[i] >= w.Length){ expressionValues[count, i] = float.NaN; } else{ string s = StringUtils.RemoveWhitespace(w[expressionColIndices[i]]); bool success = float.TryParse(s, out expressionValues[count, i]); if (!success){ expressionValues[count, i] = float.NaN; } } } for (int i = 0; i < multiNumColIndices.Count; i++){ if (multiNumColIndices[i] >= w.Length){ multiNumericAnnotation[i][count] = new double[0]; } else{ string q = w[multiNumColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){ q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){ q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); multiNumericAnnotation[i][count] = new double[ww.Length]; for (int j = 0; j < ww.Length; j++){ double q1; bool success = double.TryParse(ww[j], out q1); multiNumericAnnotation[i][count][j] = success ? q1 : double.NaN; } } } for (int i = 0; i < catColIndices.Count; i++){ if (catColIndices[i] >= w.Length){ categoryAnnotation[i][count] = new string[0]; } else{ string q = w[catColIndices[i]].Trim(); if (q.Length >= 2 && q[0] == '\"' && q[q.Length - 1] == '\"'){ q = q.Substring(1, q.Length - 2); } if (q.Length >= 2 && q[0] == '\'' && q[q.Length - 1] == '\''){ q = q.Substring(1, q.Length - 2); } string[] ww = q.Length == 0 ? new string[0] : q.Split(';'); Array.Sort(ww); categoryAnnotation[i][count] = ww; } } for (int i = 0; i < numColIndices.Count; i++){ if (numColIndices[i] >= w.Length){ numericAnnotation[i][count] = double.NaN; } else{ double q; bool success = double.TryParse(w[numColIndices[i]].Trim(), out q); numericAnnotation[i][count] = success ? q : double.NaN; } } for (int i = 0; i < textColIndices.Count; i++){ if (textColIndices[i] >= w.Length){ stringAnnotation[i][count] = ""; } else{ string q = w[textColIndices[i]].Trim(); stringAnnotation[i][count] = RemoveSplitWhitespace(RemoveQuotes(q)); } } count++; } reader.Close(); string[] columnNames = ArrayUtils.SubArray(colNames, expressionColIndices); string[] catColnames = ArrayUtils.SubArray(colNames, catColIndices); string[] numColnames = ArrayUtils.SubArray(colNames, numColIndices); string[] multiNumColnames = ArrayUtils.SubArray(colNames, multiNumColIndices); string[] textColnames = ArrayUtils.SubArray(colNames, textColIndices); matrixData.SetData(filename, RemoveQuotes(columnNames), expressionValues, RemoveQuotes(textColnames), stringAnnotation, RemoveQuotes(catColnames), categoryAnnotation, RemoveQuotes(numColnames), numericAnnotation, RemoveQuotes(multiNumColnames), multiNumericAnnotation); if (colDescriptions != null){ string[] columnDesc = ArrayUtils.SubArray(colDescriptions, expressionColIndices); string[] catColDesc = ArrayUtils.SubArray(colDescriptions, catColIndices); string[] numColDesc = ArrayUtils.SubArray(colDescriptions, numColIndices); string[] multiNumColDesc = ArrayUtils.SubArray(colDescriptions, multiNumColIndices); string[] textColDesc = ArrayUtils.SubArray(colDescriptions, textColIndices); matrixData.ExpressionColumnDescriptions = new List<string>(columnDesc); matrixData.NumericColumnDescriptions = new List<string>(numColDesc); matrixData.CategoryColumnDescriptions = new List<string>(catColDesc); matrixData.StringColumnDescriptions = new List<string>(textColDesc); matrixData.MultiNumericColumnDescriptions = new List<string>(multiNumColDesc); } foreach (string key in ArrayUtils.GetKeys(catAnnotatRows)){ string name = key; string[] svals = ArrayUtils.SubArray(catAnnotatRows[key], expressionColIndices); string[][] cat = new string[svals.Length][]; for (int i = 0; i < cat.Length; i++){ string s = svals[i].Trim(); cat[i] = s.Length > 0 ? s.Split(';') : new string[0]; } matrixData.AddCategoryRow(name, name, cat); } foreach (string key in ArrayUtils.GetKeys(numAnnotatRows)){ string name = key; string[] svals = ArrayUtils.SubArray(numAnnotatRows[key], expressionColIndices); double[] num = new double[svals.Length]; for (int i = 0; i < num.Length; i++){ string s = svals[i].Trim(); num[i] = double.NaN; double.TryParse(s, out num[i]); } matrixData.AddNumericRow(name, name, num); } matrixData.Origin = filename; status(""); }