private IEnumerable<QuantFile> LoadFiles(IEnumerable<string> filePaths, bool ms3Quant = false) { MSDataFile.CacheScans = false; //int largestQuantPeak = 0; int i = 0; foreach (TagInformation tag in UsedTags.Values) { tag.UniqueTagNumber = i++; tag.TotalSignal = 0; tag.NormalizedTotalSignal = 0; } int largestQuantPeak = i-1; //int largestQuantPeak = UsedTags.Values.Select(tag => tag.UniqueTagNumber).Concat(new[] {0}).Max(); foreach (string filePath in filePaths) { Log("Processing file:\t" + filePath); OnUpdateLog("Processing File "+filePath+"..."); QuantFile quantFile = new QuantFile(filePath); StreamReader basestreamReader = new StreamReader(filePath); int oldProgress = -1; using (CsvReader reader = new CsvReader(basestreamReader, true)) { while (reader.ReadNextRecord()) // go through csv and raw file to extract the info we want { int scanNumber = int.Parse(reader["Spectrum number"]); string filenameID = reader["Filename/id"]; string rawFileName = filenameID.Split('.')[0]; bool isDecoy = reader["DEFLINE"].StartsWith("DECOY_"); ThermoRawFile rawFile; if (!RawFiles.TryGetValue(rawFileName, out rawFile)) { throw new ArgumentException("Cannot find this raw file: " + rawFileName + ".raw"); } if (!rawFile.IsOpen) { rawFile.Open(); } int progress = (int) (100*(double)basestreamReader.BaseStream.Position/basestreamReader.BaseStream.Length); if (progress != oldProgress) { OnProgressUpdate(progress); oldProgress = progress; } //// Set default fragmentation to CAD / HCD //FragmentationMethod ScanFragMethod = filenameID.Contains(".ETD.") // ? FragmentationMethod.ETD // : FragmentationMethod.CAD; //if (ScanFragMethod == FragmentationMethod.ETD) //{ // ScanFragMethod = FragmentationMethod.CAD; // scanNumber += ETDQuantPosition; //} // Get the scan object for the sequence ms2 scan MsnDataScan quantitationMsnScan = rawFile[scanNumber] as MsnDataScan; double purity = 1; if (CalculatePurity) { double mz = quantitationMsnScan.PrecursorMz; int charge = quantitationMsnScan.PrecursorCharge; DoubleRange isolationRange = MzRange.FromDa(mz, PurityWindowInTh); MSDataScan parentScan = rawFile[quantitationMsnScan.ParentScanNumber]; purity = DeterminePurity(parentScan, mz, charge, isolationRange); } if (quantitationMsnScan == null) { throw new ArgumentException("Spectrum Number " + scanNumber + " is not a valid MS2 scan from: "+rawFile.FilePath); } if (MS3Quant) { quantitationMsnScan = null; // Look forward to find associated MS3 quant scan (based on parent scan number) int ms3ScanNumber = scanNumber + 1; while (ms3ScanNumber < rawFile.LastSpectrumNumber) { if (rawFile.GetParentSpectrumNumber(ms3ScanNumber) == scanNumber) { quantitationMsnScan = rawFile[ms3ScanNumber] as MsnDataScan; break; } ms3ScanNumber++; } if (quantitationMsnScan == null) { throw new ArgumentException("Cannot find a MS3 spectrum associated with spectrum number " + scanNumber); } } Tolerance Tolerance = quantitationMsnScan.MzAnalyzer == MZAnalyzerType.IonTrap2D ? ItMassTolerance : FtMassTolerance; bool isETD = quantitationMsnScan.DissociationType == DissociationType.ETD; double injectionTime = quantitationMsnScan.InjectionTime; var massSpectrum = quantitationMsnScan.MassSpectrum; double noise = 0; if (NoisebandCap) { // Noise is pretty constant over a small region, find the noise of the center of all isobaric tags MassRange range = new MassRange(UsedTags.Keys[0], UsedTags.Keys[UsedTags.Count - 1]); ThermoSpectrum thermoSpectrum = massSpectrum as ThermoSpectrum; if (thermoSpectrum != null) { var peak = thermoSpectrum.GetClosestPeak(range); if (peak != null) { noise = peak.Noise; } else { throw new ArgumentException("Spectrum (#" + quantitationMsnScan.SpectrumNumber + ") has no m/z peaks"); } } else { throw new ArgumentException("Spectrum (#" + quantitationMsnScan.SpectrumNumber+") , or they are low-resolution data without noise information"); } //ThermoLabeledPeak peak = massSpectrum.GetClosestPeak(range) as ThermoLabeledPeak; //if (peak != null) //{ // noise = peak.Noise; //} //else //{ // peak = massSpectrum as ThermoLabeledPeak; // if (peak == null) // { // throw new ArgumentException("Either the spectrum (#" + quantitationMsnScan.SpectrumNumber+") has no m/z peaks, or they are low-resolution data without noise information"); // } // noise = peak.Noise; //} } //Dictionary<TagInformation, QuantPeak> peaks = new Dictionary<TagInformation, QuantPeak>(); QuantPeak[] peaks = new QuantPeak[largestQuantPeak+1]; // Read in the peak data foreach (TagInformation tag in UsedTags.Values) { double tagMz = isETD ? tag.MassEtd : tag.MassCAD; var peak = massSpectrum.GetClosestPeak(Tolerance.GetRange(tagMz)); QuantPeak qPeak = new QuantPeak(tag, peak, injectionTime, quantitationMsnScan, noise, peak == null && NoisebandCap); peaks[tag.UniqueTagNumber] = qPeak; } PurityCorrect(peaks, isDecoy); PSM psm = new PSM(filenameID, scanNumber, peaks, purity); quantFile.AddPSM(psm); } } // Dispose of all raw files foreach (ThermoRawFile rawFile in RawFiles.Values) { rawFile.Dispose(); } OnUpdateLog("PSMs loaded " + quantFile.Psms.Count); Log("PSMs Loaded:\t" + quantFile.Psms.Count ); yield return quantFile; } }
private IEnumerable <QuantFile> LoadFiles(IEnumerable <string> filePaths, bool ms3Quant = false) { MSDataFile.CacheScans = false; //int largestQuantPeak = 0; int i = 0; foreach (TagInformation tag in UsedTags.Values) { tag.UniqueTagNumber = i++; tag.TotalSignal = 0; tag.NormalizedTotalSignal = 0; } int largestQuantPeak = i - 1; //int largestQuantPeak = UsedTags.Values.Select(tag => tag.UniqueTagNumber).Concat(new[] {0}).Max(); foreach (string filePath in filePaths) { Log("Processing file:\t" + filePath); OnUpdateLog("Processing File " + filePath + "..."); QuantFile quantFile = new QuantFile(filePath); StreamReader basestreamReader = new StreamReader(filePath); int oldProgress = -1; using (CsvReader reader = new CsvReader(basestreamReader, true)) { while (reader.ReadNextRecord()) // go through csv and raw file to extract the info we want { int scanNumber = int.Parse(reader["Spectrum number"]); string filenameID = reader["Filename/id"]; string rawFileName = filenameID.Split('.')[0]; bool isDecoy = reader["DEFLINE"].StartsWith("DECOY_"); ThermoRawFile rawFile; if (!RawFiles.TryGetValue(rawFileName, out rawFile)) { throw new ArgumentException("Cannot find this raw file: " + rawFileName + ".raw"); } if (!rawFile.IsOpen) { rawFile.Open(); } int progress = (int)(100 * (double)basestreamReader.BaseStream.Position / basestreamReader.BaseStream.Length); if (progress != oldProgress) { OnProgressUpdate(progress); oldProgress = progress; } //// Set default fragmentation to CAD / HCD //FragmentationMethod ScanFragMethod = filenameID.Contains(".ETD.") // ? FragmentationMethod.ETD // : FragmentationMethod.CAD; //if (ScanFragMethod == FragmentationMethod.ETD) //{ // ScanFragMethod = FragmentationMethod.CAD; // scanNumber += ETDQuantPosition; //} // Get the scan object for the sequence ms2 scan MsnDataScan quantitationMsnScan = rawFile[scanNumber] as MsnDataScan; double purity = 1; if (CalculatePurity) { double mz = quantitationMsnScan.PrecursorMz; int charge = quantitationMsnScan.PrecursorCharge; DoubleRange isolationRange = MzRange.FromDa(mz, PurityWindowInTh); MSDataScan parentScan = rawFile[quantitationMsnScan.ParentScanNumber]; purity = DeterminePurity(parentScan, mz, charge, isolationRange); } if (quantitationMsnScan == null) { OnUpdateLog("Spectrum Number " + scanNumber + " is not a valid MS2 scan from: " + rawFile.FilePath + ". Skipping PSM..."); continue; } if (MS3Quant) { quantitationMsnScan = null; // Look forward to find associated MS3 quant scan (based on parent scan number) int ms3ScanNumber = scanNumber + 1; while (ms3ScanNumber < rawFile.LastSpectrumNumber) { if (rawFile.GetParentSpectrumNumber(ms3ScanNumber) == scanNumber) { quantitationMsnScan = rawFile[ms3ScanNumber] as MsnDataScan; break; } ms3ScanNumber++; } if (quantitationMsnScan == null) { OnUpdateLog("Cannot find a MS3 spectrum associated with spectrum number " + scanNumber + ". Skipping PSM..."); continue; } } Tolerance Tolerance = quantitationMsnScan.MzAnalyzer == MZAnalyzerType.IonTrap2D ? ItMassTolerance : FtMassTolerance; bool isETD = quantitationMsnScan.DissociationType == DissociationType.ETD; double injectionTime = quantitationMsnScan.InjectionTime; //var massSpectrum = quantitationMsnScan.MassSpectrum; var thermoSpectrum = rawFile.GetLabeledSpectrum(quantitationMsnScan.SpectrumNumber); double noise = 0; if (NoisebandCap) { // Noise is pretty constant over a small region, find the noise of the center of all isobaric tags MassRange range = new MassRange(UsedTags.Keys[0], UsedTags.Keys[UsedTags.Count - 1]); if (thermoSpectrum != null) { var peak = thermoSpectrum.GetClosestPeak(range.Mean, 500); if (peak != null) { noise = peak.Noise; } else { OnUpdateLog("Spectrum (#" + quantitationMsnScan.SpectrumNumber + ") has no m/z peaks. Skipping PSM..."); continue; } } else { OnUpdateLog("Spectrum (#" + quantitationMsnScan.SpectrumNumber + ") is low-resolution data without noise information. Skipping PSM..."); continue; } } //Dictionary<TagInformation, QuantPeak> peaks = new Dictionary<TagInformation, QuantPeak>(); QuantPeak[] peaks = new QuantPeak[largestQuantPeak + 1]; // Read in the peak data foreach (TagInformation tag in UsedTags.Values) { double tagMz = isETD ? tag.MassEtd : tag.MassCAD; var peak = thermoSpectrum.GetClosestPeak(Tolerance.GetRange(tagMz)); QuantPeak qPeak = new QuantPeak(tag, peak, injectionTime, quantitationMsnScan, noise, peak == null && NoisebandCap); peaks[tag.UniqueTagNumber] = qPeak; } PurityCorrect(peaks, isDecoy); PSM psm = new PSM(filenameID, scanNumber, peaks, purity); quantFile.AddPSM(psm); } } // Dispose of all raw files foreach (ThermoRawFile rawFile in RawFiles.Values) { rawFile.Dispose(); } OnUpdateLog("PSMs loaded " + quantFile.Psms.Count); Log("PSMs Loaded:\t" + quantFile.Psms.Count); yield return(quantFile); } }