public override IEnumerable <string> Process() { var format = new MascotPeptideTextFormat(); var spectra = format.ReadFromFile(options.InputFile); var peptideMap = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower()); var rawfiles = Directory.GetFiles(options.RawDirectory, "*.raw", SearchOption.AllDirectories).ToDictionary(m => Path.GetFileNameWithoutExtension(m).ToLower()); var rententionWindow = options.RetentionTimeWindow; var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray(); if (missed.Length > 0) { throw new Exception(string.Format("Cannot find raw file of {0} in directory {1}", missed.Merge("/"), options.RawDirectory)); } var option = new ParallelOptions() { //MaxDegreeOfParallelism = Math.Min(1, peptideMap.Count), MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, peptideMap.Count), }; Parallel.ForEach(peptideMap, option, raw => { //foreach (var raw in peptideMap) //{ var peptides = raw.Value; Progress.SetMessage("Preparing isotopic for " + raw.Key + " ..."); var waitingPeaks = new List <ChromatographProfile>(); foreach (var peptide in peptides) { string file = GetTargetFile(peptide); var chro = new ChromatographProfile() { Experimental = peptide.Query.FileScan.Experimental, IdentifiedScan = peptide.Query.FileScan.FirstScan, ObservedMz = peptide.GetPrecursorMz(), TheoreticalMz = peptide.GetTheoreticalMz(), Charge = peptide.Query.Charge, Sequence = peptide.Peptide.PureSequence, FileName = Path.GetFileName(file) }; chro.InitializeIsotopicIons(options.MzTolerancePPM); waitingPeaks.Add(chro); } if (waitingPeaks.Count == 0) { //continue; return; } Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >(); List <FullMS> fullMSList = new List <FullMS>(); Progress.SetMessage("Reading full ms list from " + rawfiles[raw.Key] + "..."); using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[raw.Key]))) { var firstScan = rawReader.GetFirstSpectrumNumber(); var lastScan = rawReader.GetLastSpectrumNumber(); for (int scan = firstScan; scan <= lastScan; scan++) { var mslevel = rawReader.GetMsLevel(scan); if (mslevel == 1) { fullMSList.Add(new FullMS() { Scan = scan, RetentionTime = rawReader.ScanToRetentionTime(scan), Peaks = null }); } } var chroGroups = waitingPeaks.GroupBy(chro => string.Format("{0}_{1:0.0000}", chro.Sequence, chro.TheoreticalMz)); foreach (var chroGroup in chroGroups) { List <ChromatographProfile> profileChros = new List <ChromatographProfile>(); foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan)) { var masterScanIndex = 0; for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedScan < fullMSList[i].Scan) { break; } masterScanIndex = i; } var masterScan = fullMSList[masterScanIndex].Scan; var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime; bool bExist = false; foreach (var profileChro in profileChros) { foreach (var pkl in profileChro.Profiles) { if (pkl.Scan == fullMSList[masterScanIndex].Scan) { pkl.Identified = true; bExist = true; break; } } if (bExist) { break; } } if (bExist) { continue; } Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName)); for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (masterRetentionTime - curRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { break; } if (scanIndex == masterScanIndex) { chro.Profiles.Last().Identified = true; } } chro.Profiles.Reverse(); for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (curRetentionTime - masterRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { break; } } profileChros.Add(chro); } profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount); profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count)); bool bMain = true; foreach (var chro in profileChros) { string filename; if (bMain) { filename = Path.Combine(GetTargetDirectory(chro.Experimental), chro.FileName); } else { filename = Path.Combine(GetTargetSubDirectory(chro.Experimental), Path.ChangeExtension(chro.FileName, ".sub" + Path.GetExtension(chro.FileName))); } bMain = false; new ChromatographProfileTextWriter().WriteToFile(filename, chro); new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro); } } } } ); Progress.SetMessage("Finding boundaries ..."); var boundaryOptions = new RTemplateProcessorOptions() { InputFile = targetDir, OutputFile = options.OutputFile, RTemplate = BoundaryR, RExecute = SystemUtils.GetRExecuteLocation(), CreateNoWindow = true }; new RTemplateProcessor(boundaryOptions) { Progress = this.Progress }.Process(); return(new string[] { options.OutputFile }); }
public override IEnumerable <string> Process() { var boundaryInput = Path.ChangeExtension(options.OutputFile, ".chros.tsv"); if (!File.Exists(boundaryInput) || options.Overwrite) { var format = GetPeptideReader(); var spectra = format.ReadFromFile(options.InputFile); var peptideMap = spectra.ToGroupDictionary(m => m.Query.FileScan.Experimental.ToLower()); var rawfiles = options.RawFiles.ToDictionary(m => RawFileFactory.GetExperimental(m).ToLower()); var rententionWindow = options.MaximumRetentionTimeWindow; var missed = peptideMap.Keys.Except(rawfiles.Keys).ToArray(); if (missed.Length > 0) { throw new Exception(string.Format("Cannot find raw file of {0} in file list", missed.Merge("/"))); } var optionThreadCount = options.ThreadCount == 0 ? Environment.ProcessorCount : options.ThreadCount; var option = new ParallelOptions() { MaxDegreeOfParallelism = Math.Min(optionThreadCount, peptideMap.Count), }; var chroMap = new List <Tuple <string, List <ChromatographProfile> > >(); foreach (var raw in peptideMap) { var peptides = raw.Value; var waitingPeaks = new List <ChromatographProfile>(); foreach (var peptide in peptides) { var chro = new ChromatographProfile() { Experimental = peptide.Query.FileScan.Experimental, IdentifiedScan = peptide.Query.FileScan.FirstScan, IdentifiedRetentionTime = peptide.Query.FileScan.RetentionTime, ObservedMz = peptide.GetPrecursorMz(), TheoreticalMz = peptide.GetTheoreticalMz(), Charge = peptide.Query.Charge, Sequence = peptide.Peptide.PureSequence, FileName = GetTargetFile(peptide), SubFileName = GetTargetSubFile(peptide) }; chro.InitializeIsotopicIons(options.MzTolerancePPM, options.MinimumIsotopicPercentage); waitingPeaks.Add(chro); } chroMap.Add(new Tuple <string, List <ChromatographProfile> >(raw.Key, waitingPeaks)); } ConcurrentBag <ChromatographProfile> detected = new ConcurrentBag <ChromatographProfile>(); Parallel.ForEach(chroMap, option, raw => { var rawFileName = raw.Item1; var waitingPeaks = raw.Item2; Dictionary <string, List <ChromatographProfile> > resultMap = new Dictionary <string, List <ChromatographProfile> >(); List <FullMS> fullMSList = new List <FullMS>(); Progress.SetMessage("Reading full ms list from " + rawfiles[rawFileName] + "..."); using (var rawReader = new CacheRawFile(RawFileFactory.GetRawFileReader(rawfiles[rawFileName]))) { var firstScan = rawReader.GetFirstSpectrumNumber(); var lastScan = rawReader.GetLastSpectrumNumber(); for (int scan = firstScan; scan <= lastScan; scan++) { var mslevel = rawReader.GetMsLevel(scan); if (mslevel == 1) { fullMSList.Add(new FullMS() { Scan = scan, RetentionTime = rawReader.ScanToRetentionTime(scan), Peaks = null }); } } foreach (var chro in waitingPeaks) { if (chro.IdentifiedScan == 0 && chro.IdentifiedRetentionTime > 0) { for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedRetentionTime < fullMSList[i].RetentionTime) { break; } chro.IdentifiedScan = fullMSList[i].Scan + 1; } } } var chroGroups = waitingPeaks.GroupBy(chro => chro.GetPeptideId()); foreach (var chroGroup in chroGroups) { List <ChromatographProfile> profileChros = new List <ChromatographProfile>(); foreach (var chro in chroGroup.OrderBy(m => m.IdentifiedScan)) { var masterScanIndex = 0; for (int i = 1; i < fullMSList.Count; i++) { if (chro.IdentifiedScan < fullMSList[i].Scan) { break; } masterScanIndex = i; } var masterScan = fullMSList[masterScanIndex].Scan; var masterRetentionTime = fullMSList[masterScanIndex].RetentionTime; bool bExist = false; foreach (var profileChro in profileChros) { foreach (var pkl in profileChro.Profiles) { if (pkl.Scan == fullMSList[masterScanIndex].Scan) { pkl.Identified = true; bExist = true; break; } } if (bExist) { break; } } if (bExist) { continue; } //Progress.SetMessage("Processing {0} : {1:0.#####} : {2} : {3}", chro.Sequence, chro.ObservedMz, chro.IdentifiedScan, Path.GetFileName(chro.FileName)); //allow one missed scan int naCount = 2; for (int scanIndex = masterScanIndex; scanIndex >= 0; scanIndex--) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (masterRetentionTime - curRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { naCount--; if (naCount == 0) { break; } else { continue; } } if (scanIndex == masterScanIndex) { chro.Profiles.Last().Identified = true; } } chro.Profiles.Reverse(); naCount = 2; for (int scanIndex = masterScanIndex + 1; scanIndex < fullMSList.Count; scanIndex++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } var curRetentionTime = fullMSList[scanIndex].RetentionTime; if (curRetentionTime - masterRetentionTime > rententionWindow) { break; } if (!AddEnvelope(chro, rawReader, fullMSList, scanIndex)) { naCount--; if (naCount == 0) { break; } else { continue; } } } profileChros.Add(chro); } profileChros.RemoveAll(l => l.Profiles.Count < options.MinimumScanCount); profileChros.Sort((m1, m2) => m2.Profiles.Count.CompareTo(m1.Profiles.Count)); bool bMain = true; foreach (var chro in profileChros) { var filename = bMain ? chro.FileName : chro.SubFileName; if (bMain) { detected.Add(chro); } bMain = false; new ChromatographProfileTextWriter().WriteToFile(filename, chro); new ChromatographProfileXmlFormat().WriteToFile(filename + ".xml", chro); } } } } ); var chroList = new List <ChromatographProfile>(detected); chroList.Sort((m1, m2) => m1.FileName.CompareTo(m2.FileName)); if (chroList.Count == 0) { throw new Exception("Cannot find chromotograph!"); } using (var sw = new StreamWriter(boundaryInput)) { sw.WriteLine("ChroDirectory\tChroFile\tSample\tPeptideId\tTheoreticalMz\tCharge\tIdentifiedScan"); foreach (var chro in chroList) { sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", Path.GetDirectoryName(chro.FileName).Replace("\\", "/"), Path.GetFileNameWithoutExtension(chro.FileName), chro.Experimental, chro.GetPeptideId(), chro.TheoreticalMz, chro.Charge, chro.IdentifiedScan); } } } if (!File.Exists(options.OutputFile) || options.Overwrite) { Progress.SetMessage("Finding boundaries ..."); var boundaryOptions = new RTemplateProcessorOptions() { InputFile = boundaryInput, OutputFile = options.OutputFile, RTemplate = BoundaryR, RExecute = ExternalProgramConfig.GetExternalProgram("R"), CreateNoWindow = true }; boundaryOptions.Parameters.Add("outputImage<-" + (options.DrawImage ? "1" : "0")); boundaryOptions.Parameters.Add("maximumProfileDistance<-" + options.MaximumProfileDistance.ToString()); new RTemplateProcessor(boundaryOptions) { Progress = this.Progress }.Process(); } //if (options.DrawImage) //{ // Progress.SetMessage("Drawing images ..."); // var imageOptions = new RTemplateProcessorOptions() // { // InputFile = options.OutputFile, // OutputFile = Path.ChangeExtension(options.OutputFile, ".image"), // RTemplate = ImageR, // RExecute = SystemUtils.GetRExecuteLocation(), // CreateNoWindow = true, // NoResultFile = true // }; // new RTemplateProcessor(imageOptions) { Progress = this.Progress }.Process(); //} return(new string[] { options.OutputFile }); }