private IMatrixData ProcessDbFiles(ProcessInfo processInfo, int nThreads, IList<Database> databases) { string tempFile = Path.Combine(FileUtils.GetTempFolder(), "databaseref.txt"); IMatrixData matrix; StreamWriter writer = null; try{ processInfo.Progress(0); processInfo.Status(string.Format("Read database files [{0}|{1}]", 0, "?")); Enum[] enums = new Enum[] { databaseRef.file, databaseRef.source, databaseRef.specie, databaseRef.taxid, databaseRef.version, databaseRef.identifier }; IList<string> header = enums.Select(Constants.GetPattern).ToList(); if (databases == null || databases.Count == 0){ return null; } writer = new StreamWriter(tempFile); int nTasks = databases.Count; writer.WriteLine(StringUtils.Concat("\t", header)); writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", header.Select(x => "T"))); ThreadDistributor distr = new ThreadDistributor(nThreads, nTasks, x => ParseDatabase(writer, databases[x], string.Format( "Read database files [{0}|{1}]", x + 1, nTasks), (x + 1)*100/nTasks, processInfo)); distr.Start(); processInfo.Status("Close all files"); writer.Close(); writer.Dispose(); writer = null; processInfo.Progress(0); processInfo.Status("Create DatabaseRef Matrix"); matrix = new MatrixData(); LoadData(matrix, tempFile, processInfo); } catch (Exception ex){ throw ex; } finally{ if (writer != null){ writer.Close(); } if (File.Exists(tempFile)){ File.Delete(tempFile); } } return matrix; }
private IMatrixData ProcessAplFiles(ProcessInfo processInfo, int nThreads, IList<MsRunImpl> aplfiles) { string tempFile = Path.Combine(FileUtils.GetTempFolder(), "spectraref.txt"); if (File.Exists(tempFile)){ File.Delete(tempFile); } IMatrixData matrix; StreamWriter writer = null; try{ Enum[] enums = new Enum[]{spectraRef.raw_file, spectraRef.charge, spectraRef.scan_number, spectraRef.location, spectraRef.format, spectraRef.id_format, spectraRef.fragmentation, spectraRef.mz, spectraRef.index}; IList<string> header = enums.Select(Constants.GetPattern).ToList(); if (aplfiles == null || aplfiles.Count == 0){ return null; } int nTasks = aplfiles.Count; processInfo.Progress(0); processInfo.Status(string.Format("Read Andromeda peaklist files [{0}|{1}]", 0, nTasks)); writer = new StreamWriter(tempFile); writer.WriteLine(StringUtils.Concat("\t", header)); writer.WriteLine("#!{Type}" + StringUtils.Concat("\t", header.Select(x => "T"))); ThreadDistributor distr = new ThreadDistributor(nThreads, nTasks, x => ParseAplFile(aplfiles[x], writer, string.Format( "Read Andromeda peaklist files [{0}|{1}]", x + 1, nTasks), (x + 1)*100/nTasks, processInfo)); distr.Start(); processInfo.Status("Close all files"); writer.Close(); writer.Dispose(); writer = null; processInfo.Progress(0); processInfo.Status("Create SpectraRef matrix"); matrix = new MatrixData(); LoadData(matrix, tempFile, processInfo); } catch (Exception ex){ throw ex; } finally{ if (writer != null){ writer.Close(); } if (File.Exists(tempFile)){ File.Delete(tempFile); } } return matrix; }
private static void ParseAplFile(MsRunImpl aplfile, StreamWriter writer, string status, int progress, ProcessInfo processInfo) { lock (processInfo){ processInfo.Progress(progress); processInfo.Status(status); } string file = aplfile.Location.Value; string form = aplfile.Format == null ? "" : aplfile.Format.Name; string idform = aplfile.IdFormat == null ? "" : aplfile.IdFormat.Name; int m = 0; Regex regex = new Regex("Raw[f|F]ile: (.*) Index: ([0-9]+)"); AplParser parser = new AplParser(delegate(AplEntry entry) { if (regex.IsMatch(entry.Title)){ string rawfile = regex.Match(entry.Title).Groups[1].Value; string scannumber = regex.Match(entry.Title).Groups[2].Value; m++; object[] items = new object[]{ rawfile, entry.PrecursorCharge, scannumber, file, form, idform, entry.Fragmentation, entry.Mz, m.ToString(CultureInfo.InvariantCulture) }; lock (writer){ writer.WriteLine(StringUtils.Concat("\t", items)); } } }); parser.Parse(file); lock (writer){ writer.Flush(); } }
private static void ParseDatabase(StreamWriter writer, Database db, string status, int progress, ProcessInfo processInfo) { if (db.File == null || !File.Exists(db.File)){ return; } StreamReader reader = new StreamReader(db.File); string line; Regex regex = new Regex(db.SearchExpression); while ((line = reader.ReadLine()) != null){ if (line.StartsWith(">")){ string identifier = regex.Match(line).Groups[1].Value; object[] items = new object[]{ db.File, db.Source, db.Species, db.Taxid, db.Version, db.Prefix == null ? identifier : db.Prefix + identifier }; lock (writer){ writer.WriteLine(StringUtils.Concat("\t", items)); } } } reader.Close(); lock (processInfo){ processInfo.Progress(progress); processInfo.Status(status); } }
public override IMatrixData ProcessData(IMatrixData[] inputData, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { TextWriter defaultOut = Console.Out; TextWriter defaultErr = Console.Error; try{ if (documents == null){ documents = new IDocumentData[NumDocuments]; for (int i = 0; i < NumDocuments; i++){ documents[i] = new DocumentData(); } } TextWriter logger = null; if (documents.Length > 0){ logger = new StreamWriter(new DocumentStream(documents[0])); Console.SetOut(logger); Console.SetError(logger); } int nThreads = GetMaxThreads(param); IList<MsRunImpl> runs = new List<MsRunImpl>(); SingleChoiceWithSubParams singleSub = param.GetParam(MetadataElement.MS_RUN.Name) as SingleChoiceWithSubParams; if (singleSub != null){ MsRunParam sub = singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as MsRunParam; if (sub != null){ if (sub.Value != null){ foreach (MsRunImpl run in sub.Value){ runs.Add(run); } } } } IList<StudyVariable> studyVariables = new List<StudyVariable>(); singleSub = param.GetParam(MetadataElement.STUDY_VARIABLE.Name) as SingleChoiceWithSubParams; if (singleSub != null){ StudyVariableParam sub = singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as StudyVariableParam; if (sub != null){ if (sub.Value != null){ foreach (StudyVariable variable in sub.Value){ studyVariables.Add(variable); } } } } IList<Sample> samples = new List<Sample>(); singleSub = param.GetParam(MetadataElement.SAMPLE.Name) as SingleChoiceWithSubParams; if (singleSub != null){ SampleParam sub = singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as SampleParam; if (sub != null){ if (sub.Value != null){ foreach (Sample sample in sub.Value){ samples.Add(sample); } } } } IList<Assay> assays = new List<Assay>(); singleSub = param.GetParam(MetadataElement.ASSAY.Name) as SingleChoiceWithSubParams; if (singleSub != null){ AssayParam sub = singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as AssayParam; if (sub != null){ if (sub.Value != null){ foreach (Assay assay in sub.Value){ assays.Add(assay); } } } } IList<Database> databases = new List<Database>(); singleSub = param.GetParam("database") as SingleChoiceWithSubParams; if (singleSub != null){ DatabaseParam sub = singleSub.SubParams[singleSub.Value].GetAllParameters().FirstOrDefault() as DatabaseParam; if (sub != null && sub.Value != null){ foreach (Database db in sub.Value){ databases.Add(db); } } } IMatrixData output = (IMatrixData) inputData[0].CreateNewInstance(DataType.Matrix); List<string> columnnames = new List<string>{ MetadataElement.STUDY_VARIABLE.Name, MetadataElement.ASSAY.Name, MetadataElement.MS_RUN.Name, MetadataElement.SAMPLE.Name, MetadataElement.INSTRUMENT.Name }; List<string[]> matrix = new List<string[]>(); for (int i = 0; i < columnnames.Count; i++){ matrix.Add(new string[assays.Count]); } for (int i = 0; i < assays.Count; i++){ Assay assay = assays[i]; MsRunImpl runImpl = runs.FirstOrDefault(x => x.Id.Equals(assay.MsRun.Id)); Instrument instrument = instruments.FirstOrDefault(x => x.Id.Equals(assay.MsRun.Id)); if (runImpl == null){ continue; } var studyVariable = i < studyVariables.Count ? studyVariables[i] : null; var sample = i < samples.Count ? samples[i] : null; foreach (var s in studyVariables){ if (s.AssayMap.ContainsKey(assay.Id)){ studyVariable = s; try{ int sampleId = studyVariable.SampleMap.FirstOrDefault().Key; sample = samples.FirstOrDefault(x => x.Id.Equals(sampleId)); } catch (Exception){ Console.Error.WriteLine("Can not find sample"); } break; } } AddRow(matrix, columnnames, i, runImpl, assay, sample, studyVariable, instrument); } output.SetData(Matrix.Experiment, new List<string>(), new float[assays.Count,columnnames.Count], columnnames, matrix, new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>(), new List<string>(), new List<double[][]>(), new List<string>(), new List<string[][]>(), new List<string>(), new List<double[]>()); IList<IMatrixData> supplement = new List<IMatrixData>(); try{ IList<MsRunImpl> aplfiles = runs.Where(x => x.Location != null && x.Location.Value.EndsWith(".apl")).ToList(); IMatrixData temp = ProcessAplFiles(processInfo, nThreads, aplfiles); if (temp != null){ supplement.Add(temp); } } catch (Exception e){ throw new Exception("Could not parse spectra file(s)! " + e.Message + "\n" + e.StackTrace); } try{ IMatrixData temp = ProcessDbFiles(processInfo, databases.Count < nThreads ? 1 : nThreads, databases); if (temp != null){ supplement.Add(temp); } } catch (Exception e){ throw new Exception("Could not parse database file(s)! " + e.Message + "\n" + e.StackTrace); } if (logger != null){ logger.Dispose(); } supplTables = supplement.ToArray(); processInfo.Status("Define Experiment: DONE!"); processInfo.Progress(100); return output; } catch (Exception e){ string msg = "Process aborted! " + e.Message; MessageBox.Show(msg); Logger.Error(Name, msg); processInfo.Status(msg); } finally{ Console.SetOut(defaultOut); Console.SetError(defaultErr); } return null; }