private static string ProcessDataReadFromFile(IDataWithAnnotationRows mdata, Parameters param) { Parameter <string> fp = param.GetParam <string>("Input file"); string filename = fp.Value; string[] colNames; try { colNames = TabSep.GetColumnNames(filename, '\t'); } catch (Exception) { return("Could not open file " + filename + ". It maybe open in another program."); } int nameIndex = GetNameIndex(colNames); if (nameIndex < 0) { return("Error: the file has to contain a column called 'Name'."); } if (colNames.Length < 2) { return("Error: the file does not contain a numerical column."); } string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t'); Dictionary <string, int> map = ArrayUtils.InverseMap(nameCol); for (int i = 0; i < colNames.Length; i++) { if (i == nameIndex) { continue; } string groupName = colNames[i]; string[] groupCol = TabSep.GetColumn(groupName, filename, '\t'); double[] newCol = new double[mdata.ColumnCount]; for (int j = 0; j < newCol.Length; j++) { string colName = mdata.ColumnNames[j]; if (!map.ContainsKey(colName)) { newCol[j] = double.NaN; continue; } int ind = map[colName]; string group = groupCol[ind] ?? ""; group = group.Trim(); if (string.IsNullOrEmpty(group)) { newCol[j] = double.NaN; } else { if (!Parser.TryDouble(group, out newCol[j])) { newCol[j] = double.NaN; } } } mdata.AddNumericRow(groupName, groupName, newCol); } return(null); }
private static string ProcessDataReadFromFile(IDataWithAnnotationRows mdata, Parameters param) { Parameter <string> fp = param.GetParam <string>("Input file"); string filename = fp.Value; string[] colNames; try { colNames = TabSep.GetColumnNames(filename, '\t'); } catch (Exception) { return("Could not open file " + filename + ". It maybe open in another program."); } int nameIndex = GetNameIndex(colNames); if (nameIndex < 0) { return("Error: the file has to contain a column called 'Name'."); } if (colNames.Length < 2) { return("Error: the file does not contain a grouping column."); } string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t'); for (int i = 0; i < nameCol.Length; i++) { MessageBox.Show(nameCol[i].ToString()); } return(null); }
private static string ProcessDataReadFromFileList(IMatrixData mdata, Parameters param) { Parameter <string> fp = param.GetParam <string>("Input file"); string filename = fp.Value; string[] colNames; try { colNames = TabSep.GetColumnNames(filename, '\t'); } catch (Exception) { return("Could not open file " + filename + ". It maybe open in another program."); } int nameIndex = GetNameIndex(colNames); string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t'); MessageBox.Show(nameCol.Length.ToString()); MessageBox.Show(mdata.ColumnCount.ToString()); for (int i = 0; i < mdata.ColumnCount; i++) { MessageBox.Show(nameCol.Length.ToString()); MessageBox.Show(mdata.ColumnCount.ToString()); /* if (nameCol.Length != mdata.ColumnCount) * { * return "ERROR"; * } * else { mdata.ColumnNames[i] = nameCol[i]; } */ } return(null); }
//gene enrichment analysis for both Basic PECA and PECA-N public static IMatrixData GetGOEnr(IMatrixData mdata, string workingDir, int option)//, out string errString) { char separator = '\t'; string filename = Path.Combine(workingDir, @".\Goterms.txt"); IMatrixData mNew = (IMatrixData)mdata.CreateNewInstance(); string name = "GSA"; if (option == 0) { name = name + "_Degradation"; } else if (option == 1) { name = name + "_Synthesis"; } mNew.Clear(); mNew.Name = name; mNew.AltName = name; //update //mNew.AltName = "Gene Set Enrichment Analysis"; //mNew.Description = "Gene Set Enrichment Analysis"; string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, null, separator); string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, separator); int nrows = TabSep.GetRowCount(filename); mNew.Values.Init(nrows, 0); mNew.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(), new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(), new List <string>(), new List <string>(), new List <double[][]>()); //convert the ones not matching regex to numeric string pattern = @"^((?!id|name|members).)*$"; Regex numericReg = new Regex(pattern); List <int> numericList = new List <int>(); for (int i = 0; i < colNames.Length; i++) { if (numericReg.Match(colNames[i]).Success) { numericList.Add(i); } } StringToNumerical(numericList, mNew); return(mNew); }
private static string ProcessDataReadFromFile(IMatrixData mdata, Parameters param) { Parameter <string> fp = param.GetParam <string>("Input file"); string filename = fp.Value; string[] colNames = TabSep.GetColumnNames(filename, '\t'); int nameIndex = GetNameIndex(colNames); if (nameIndex < 0) { return("Error: the file has to contain a column called 'Name'."); } if (colNames.Length < 2) { return("Error: the file does not contain a grouping column."); } string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t'); Dictionary <string, int> map = ArrayUtils.InverseMap(nameCol); for (int i = 0; i < colNames.Length; i++) { if (i == nameIndex) { continue; } string groupName = colNames[i]; string[] groupCol = TabSep.GetColumn(groupName, filename, '\t'); string[][] newCol = new string[mdata.ColumnCount][]; for (int j = 0; j < newCol.Length; j++) { string colName = mdata.ColumnNames[j]; if (!map.ContainsKey(colName)) { newCol[j] = new string[0]; continue; } int ind = map[colName]; string group = groupCol[ind] ?? ""; group = group.Trim(); if (string.IsNullOrEmpty(group)) { newCol[j] = new string[0]; } else { string[] w = group.Split(';'); Array.Sort(w); for (int k = 0; k < w.Length; k++) { w[k] = w[k].Trim(); } newCol[j] = w; } } mdata.AddCategoryRow(groupName, groupName, newCol); } return(null); }
private static string ProcessDataReadFromFileList(IMatrixData mdata, Parameters param) { Parameter <string> fp = param.GetParam <string>("Input file"); string filename = fp.Value; string[] colNames; try{ colNames = TabSep.GetColumnNames(filename, '\t'); } catch (Exception) { return("Could not open file " + filename + ". It maybe open in another program."); } int nameIndex = GetNameIndex(colNames); string[] nameCol = TabSep.GetColumn(colNames[nameIndex], filename, '\t'); return(null); }
private static void LoadSplit(IMatrixData mdata, string filename, bool csv) { char separator = csv ? ',' : '\t'; string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, null, separator); string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, separator); int nrows = TabSep.GetRowCount(filename); mdata.Values.Init(nrows, 0); mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(), new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(), new List <string>(), new List <string>(), new List <double[][]>()); mdata.Origin = filename; }
public void TestGetColumnNamesFromGzippedFile() { var lines = new[] { "Col_1 Col_2", "a b", }; var tmpFile = Path.GetTempFileName() + ".gz"; using (var memory = new MemoryStream(Encoding.UTF8.GetBytes(string.Join("\n", lines)))) using (var outFile = File.Create(tmpFile)) using (var gzip = new GZipStream(outFile, CompressionMode.Compress)) { memory.CopyTo(gzip); } var columnNames = TabSep.GetColumnNames(tmpFile, ' '); CollectionAssert.AreEqual(new [] { "Col_1", "Col_2" }, columnNames); }
//this function is modified from PerseusPluginLib/Load/UnstructuredTxtUpload.cs LoadSplit function //obtains the output from fdr.exe (so only applicable to PECA CORE and N) public static void GetOutput(IMatrixData mdata, Parameters param, Parameters dataParam, string filename, string geneName, string expSeries1 = "Expression Series 1", int numOfSeries = 2) { char separator = '\t'; //gene name column name is not included in the file so need to replace it //gene name ReplaceFirstLine(filename, geneName); string[] colNames = TabSep.GetColumnNames(filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, null, separator); string[][] cols = TabSep.GetColumns(colNames, filename, 0, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, separator); int nrows = TabSep.GetRowCount(filename); string[] expressionColumnsNames = ArrayUtils.Concat(mdata.ColumnNames, mdata.NumericColumnNames); mdata.Clear(); mdata.Name = "PECA Analysis"; mdata.Values.Init(nrows, 0); mdata.SetAnnotationColumns(new List <string>(colNames), new List <string>(colNames), new List <string[]>(cols), new List <string>(), new List <string>(), new List <string[][]>(), new List <string>(), new List <string>(), new List <double[]>(), new List <string>(), new List <string>(), new List <double[][]>()); //be careful with changes of Number of time points in the future int numOfExpCols = numOfSeries * dataParam.GetParam <int[]>(expSeries1).Value.Length; //file format is structured so that expressions columns are before numeric ones //so convert the numeric ones before expression columns //first column guaranteed to be the name column int[] expList = Enumerable.Range(1, numOfExpCols).ToArray(); int[] numericList = Enumerable.Range(numOfExpCols + 1, colNames.Count() - numOfExpCols - 1).ToArray(); StringToNumerical(numericList, mdata); StringToExpression(expList, mdata); }
public void LoadData(IMatrixData matrixData, Parameters parameters, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int ind = parameters.GetParam <int>("Organism").Value; string filename = GetOrganismFiles()[ind]; List <string> stringColnames = new List <string>(TabSep.GetColumnNames(filename, '\t')); List <string[]> stringCols = new List <string[]>(); foreach (string t in stringColnames) { string[] col = TabSep.GetColumn(t, filename, '\t'); stringCols.Add(col); } matrixData.Name = "Gene list"; matrixData.ColumnNames = new List <string>(); matrixData.Values.Init(stringCols[0].Length, 0); matrixData.SetAnnotationColumns(stringColnames, stringCols, new List <string>(), new List <string[][]>(), new List <string>(), new List <double[]>(), new List <string>(), new List <double[][]>()); matrixData.Origin = "Gene list"; }
internal void UpdateFile(string filename) { textBox1.Text = filename; bool csv = filename.ToLower().EndsWith(".csv"); char separator = csv ? ',' : '\t'; string[] colNames; Dictionary <string, string[]> annotationRows = new Dictionary <string, string[]>(); try{ colNames = TabSep.GetColumnNames(filename, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, annotationRows, separator); } catch (Exception) { MessageBox.Show("Could not open the file '" + filename + "'. It is probably opened by another program."); return; } string[] colTypes = null; if (annotationRows.ContainsKey("Type")) { colTypes = annotationRows["Type"]; annotationRows.Remove("Type"); } string msg = TabSep.CanOpen(filename); if (msg != null) { MessageBox.Show(msg); return; } multiListSelectorControl1.Init(colNames); if (colTypes != null) { FormUtils.SelectExact(colNames, colTypes, multiListSelectorControl1); } else { FormUtils.SelectHeuristic(colNames, multiListSelectorControl1); } }
public void LoadData(IMatrixData mdata, Parameters parameters, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { PerseusLoadMatrixParam par = (PerseusLoadMatrixParam)parameters.GetParam("File"); string filename = par.Filename; if (string.IsNullOrEmpty(filename)) { processInfo.ErrString = "Please specify a filename"; return; } if (!File.Exists(filename)) { processInfo.ErrString = "File '" + filename + "' does not exist."; return; } string ftl = filename.ToLower(); bool csv = ftl.EndsWith(".csv") || ftl.EndsWith(".csv.gz"); char separator = csv ? ',' : '\t'; string[] colNames; Dictionary <string, string[]> annotationRows = new Dictionary <string, string[]>(); try{ colNames = TabSep.GetColumnNames(filename, PerseusUtils.commentPrefix, PerseusUtils.commentPrefixExceptions, annotationRows, separator); } catch (Exception) { processInfo.ErrString = "Could not open the file '" + filename + "'. It is probably opened in another program."; return; } string origin = filename; int[] eInds = par.MainColumnIndices; int[] nInds = par.NumericalColumnIndices; int[] cInds = par.CategoryColumnIndices; int[] tInds = par.TextColumnIndices; int[] mInds = par.MultiNumericalColumnIndices; List <Tuple <Relation[], int[], bool> > filters = new List <Tuple <Relation[], int[], bool> >(); string errString; foreach (Parameters p in par.MainFilterParameters) { PerseusUtils.AddFilter(filters, p, eInds, out errString); if (errString != null) { processInfo.ErrString = errString; return; } } foreach (Parameters p in par.NumericalFilterParameters) { PerseusUtils.AddFilter(filters, p, nInds, out errString); if (errString != null) { processInfo.ErrString = errString; return; } } int nrows = GetRowCount(filename, eInds, filters, separator); StreamReader reader = FileUtils.GetReader(filename); StreamReader auxReader = FileUtils.GetReader(filename); PerseusUtils.LoadMatrixData(annotationRows, eInds, cInds, nInds, tInds, mInds, processInfo, colNames, mdata, reader, auxReader, nrows, origin, separator, par.ShortenExpressionColumnNames, filters); reader.Close(); auxReader.Close(); GC.Collect(); }