/// <summary> /// Loads spectra from Raw files /// </summary> /// <returns></returns> public Dictionary <Sample, Spectra> LoadSpectras(bool loadMS = true, bool filterMS2 = true) { //TODO test compatibility with QExactive, mzML ... other known formats AllSpectras = new Dictionary <Sample, Spectra>(); for (int i = 0; i < Project.Count; i++) { Sample sample = Project[i]; string trackFile = vsCSV.GetFolder(sample.sSDF) + vsCSV.GetFileName_NoExtension(sample.sSDF) + "_Tracks.csv"; string msmsIonFile = vsCSV.GetFolder(sample.sSDF) + vsCSV.GetFileName_NoExtension(sample.sSDF) + "_MSMSIons.csv"; if (dbOptions.LoadSpectraIfFound && System.IO.File.Exists(trackFile) && System.IO.File.Exists(msmsIonFile)) { dbOptions.ConSole.WriteLine("Loading Sectra from " + trackFile + " AND " + msmsIonFile); if (loadMS) { AllSpectras.Add(sample, Spectra.Import(msmsIonFile, trackFile, dbOptions)); } else { AllSpectras.Add(sample, Spectra.Import(msmsIonFile, null, dbOptions)); } } else { dbOptions.ConSole.WriteLine("Loading Sectra " + sample.sSDF); pwiz.CLI.msdata.MSDataFile msFile = new pwiz.CLI.msdata.MSDataFile(sample.sSDF); Spectra spectra = Spectra.Load(msFile, dbOptions, sample.sSDF, loadMS, filterMS2); spectra.Sort(ProductSpectrum.AscendingPrecursorMassComparison); dbOptions.ConSole.WriteLine(sample.sSDF + " [" + spectra.Count + " msms scans]"); if (dbOptions.SaveMS1Peaks) { spectra.ExportTracks(trackFile); } if (dbOptions.SaveMSMSPeaks) { spectra.ExportMSMS(msmsIonFile); } AllSpectras.Add(sample, spectra); } } return(AllSpectras); }
public static Spectra Import(string filenameMSMS, string filenameTracks, DBOptions dbOptions) { Spectra spectra = new Spectra(); vsCSV csv = new vsCSV(filenameMSMS); if (csv.LINES_LIST.Count == 0 || csv.LINES_LIST[0].CompareTo(ProductSpectrum.TITLE) != 0) { return(null); } for (int i = 1; i < csv.LINES_LIST.Count; i++) { string[] splits = csv.LINES_LIST[i].Split(vsCSV._Generic_Separator); double mz = double.Parse(splits[3]); int charge = int.Parse(splits[5]); int nbPeaks = int.Parse(splits[9]); GraphML_List <MsMsPeak> peaks = new GraphML_List <MsMsPeak>(nbPeaks); i++; for (int j = 0; j < nbPeaks; i++, j++) { try { string[] splitPeaks = csv.LINES_LIST[i].Split('\t'); if (splitPeaks.Length > 2) { peaks.Add(new MsMsPeak(double.Parse(splitPeaks[0]), double.Parse(splitPeaks[1]), int.Parse(splitPeaks[2]))); } else { peaks.Add(new MsMsPeak(double.Parse(splitPeaks[0]), double.Parse(splitPeaks[1]), 0)); } } catch (Exception) { dbOptions.ConSole.WriteLine("Error parsing line : " + csv.LINES_LIST[i]); } } spectra.AddMSMS(new ProductSpectrum(int.Parse(splits[0]), double.Parse(splits[1]), splits[2], mz, double.Parse(splits[4]), charge, Proteomics.Utilities.Numerics.MassFromMZ(mz, charge), peaks, double.Parse(splits[8]), double.Parse(splits[10]), double.Parse(splits[11]))); } if (!string.IsNullOrEmpty(filenameTracks)) { spectra.tracks = Tracks.Import(filenameTracks, dbOptions); } return(spectra); }
private static Tracks ComputeSpectraTracks(Spectra spectra, DBOptions options, string outputFilename, int missingScan, int centroid, int minPeaks, double valleyFactor, MaxQuant.CentroidPosition centroidMethod) { //Trail.RemoveFinished(ref trails, spectra, -1); double[] centerMassArray; float[] centerMassErrorArray; float[] intensityArray; float[] minTimeArray; float[] maxTimeArray; long[] filePosArray; //TODO Cycle values to optimize missing scans and centroid values string file = null; if (options.WriteMaxQuantPeakFile) { file = options.OutputFolder + vsCSV.GetFileName_NoExtension(outputFilename) + "_Peaks.txt"; } MaxQuant.PeakDetection.Detect(file, missingScan, //*1-2-3-4-5 centroid, //*1-2-3-4-5-6-7-8-9-10 centroidMethod, //* false, 0, options.precursorMassTolerance.Value, //TODO ensure its always in ppm minPeaks, //*1-2-3-4-5-6-7-8-9-10 valleyFactor, //*0.1-0.2-0.3-...-3.0 true, 0, new Trinity.MaxQuant.RawFileWrapper(spectra), true, null, out centerMassArray, out centerMassErrorArray, out intensityArray, out minTimeArray, out maxTimeArray, out filePosArray); Tracks tracks = new Tracks(); for (int i = 0; i < centerMassArray.Length; i++) { tracks.AddTrack(centerMassArray[i], (minTimeArray[i] + maxTimeArray[i]) * 0.5, minTimeArray[i], maxTimeArray[i], intensityArray[i]); } return(tracks); }
public void GenerateQueries(Sample entry, Spectra spectra, Tracks tracks)//, double mz, double rt, double intensity) { Dictionary <Track, Precursor> Tracks = new Dictionary <Track, Precursor>(); Dictionary <Track, Precursor> Isotopes = new Dictionary <Track, Precursor>(); //Create one query per Spectrum-Precursor duo, including Isotopes in the process to ease search //For further analysis, maintain a list of precursors (excluding isotopes) int nbMissedTrack = 0; //vsSDF sdf = entry.GetSDF();// Samples.LoadSDF(entry); //tracks.PrepareRtSort(); //sdf.TRACKS_LIST.PrepareRtSort(); spectra.Sort(ProductSpectrum.AscendingPrecursorMassComparison); foreach (ProductSpectrum spectrum in spectra) { NbSpectrum++; double intensityCumul = 0.0; bool foundCharge = false; Track closestTrack = null; List <Query> newQueries = new List <Query>(); //TODO No threshold on sdf files, and preferably a C# routine that does what MassSense do foreach (Track track in tracks.GetTracksInMzRange(spectrum.PrecursorMZ, spectrum.IsolationWindow * dbOptions.EffectiveIsolationWindowRatio))//TODO Optimize this value { Precursor prec = null; if (track.RT_Min <= spectrum.RetentionTimeInMin && track.RT_Max >= spectrum.RetentionTimeInMin) { if (closestTrack == null || Math.Abs(track.MZ - spectrum.PrecursorMZ) < Math.Abs(closestTrack.MZ - spectrum.PrecursorMZ)) { closestTrack = track; } if (Isotopes.ContainsKey(track)) { break; } if (Tracks.ContainsKey(track)) { prec = Tracks[track]; } else { GraphML_List <Precursor> isotopes = GetIsotopes(track, dbOptions, tracks, entry); if (isotopes.Count > 0) { prec = new Precursor(track, isotopes[0].Charge, entry, 0.0, isotopes); Tracks.Add(track, prec); prec.OtherCharges = GetOtherCharges(prec, dbOptions, tracks, entry); foreach (Precursor isotope in prec.Isotopes) { if (!Isotopes.ContainsKey(isotope.Track)) { Isotopes.Add(isotope.Track, isotope); } } } } if (prec != null) { intensityCumul += track.INTENSITY; newQueries.Add(new Query(dbOptions, entry, spectrum, prec, NbSpectrum)); if (prec.Charge == spectrum.PrecursorCharge) { foundCharge = true; } } } } if (!foundCharge) { /*if (closestTrack != null && Tracks.ContainsKey(closestTrack) && Math.Abs(Numerics.CalculateMassError(closestTrack.MZ, spectrum.PrecursorMZ, dbOptions.precursorMassTolerance.Units)) < dbOptions.precursorMassTolerance.Value) * { * if(closestTrack.RT_Min > (float)(spectrum.RetentionTimeInMin - dbOptions.ComputedRetentionTimeDiff)) * closestTrack.RT_Min = (float)(spectrum.RetentionTimeInMin - dbOptions.ComputedRetentionTimeDiff); * if (closestTrack.RT_Max < (float)(spectrum.RetentionTimeInMin + dbOptions.ComputedRetentionTimeDiff)) * closestTrack.RT_Max = (float)(spectrum.RetentionTimeInMin + dbOptions.ComputedRetentionTimeDiff); * if (closestTrack.INTENSITY < spectrum.PrecursorIntensity) * closestTrack.INTENSITY = spectrum.PrecursorIntensity; * * Precursor prec = Tracks[closestTrack]; * if (prec.Charge == spectrum.PrecursorCharge) * { * Add(new Query(dbOptions, entry, spectrum, prec, NbSpectrum)); * } * else * { * Precursor newPrec = new Precursor(closestTrack, spectrum.PrecursorCharge, entry); * Add(new Query(dbOptions, entry, spectrum, newPrec, NbSpectrum)); * } * } * else//*/ { nbMissedTrack++; closestTrack = new Track((float)spectrum.PrecursorMZ, (float)spectrum.RetentionTimeInMin, spectrum.PrecursorIntensity, (float)(spectrum.RetentionTimeInMin - dbOptions.ComputedRetentionTimeDiff), (float)(spectrum.RetentionTimeInMin + dbOptions.ComputedRetentionTimeDiff), true); Precursor prec = new Precursor(closestTrack, spectrum.PrecursorCharge, entry); Tracks.Add(closestTrack, prec); Add(new Query(dbOptions, entry, spectrum, prec, NbSpectrum)); } }//*/ if (newQueries.Count > 0) { //Remove precursors if estimated fragment intensities are too low (based on precursor intensity ratios and isolation window placement) foreach (Query q in newQueries) { //if (q.precursor.Track.INTENSITY > intensityCumul * dbOptions.MinimumPrecursorIntensityRatioInIsolationWindow)//Need to be 5% of all intensity //{ this.Add(q); //} } } Console.Write("\r{0}% ", ((100 * NbSpectrum) / spectra.Count)); } Console.Write("\r{0}% ", 100); //Sort queries to ease search this.Sort(AscendingPrecursorMassComparison); foreach (Track track in Tracks.Keys) { if (!Isotopes.ContainsKey(track)) { Precursors.Add(Tracks[track]); } } //TODO Validate this approach //REMOVE QUERIES RELATED TO AN ISOTOPE and Compute the average CoElution Dictionary <ProductSpectrum, double> DicOfSpectrumIntensities = new Dictionary <ProductSpectrum, double>(); for (int i = 0; i < this.Count;) { Query query = this[i]; if (!Isotopes.ContainsKey(query.precursor.Track)) { if (!DicOfSpectrumIntensities.ContainsKey(query.spectrum)) { DicOfSpectrumIntensities.Add(query.spectrum, query.precursor.Track.INTENSITY); } else { DicOfSpectrumIntensities[query.spectrum] += query.precursor.Track.INTENSITY; } i++; } else { this.RemoveAt(i); } } //REMOVE Queries with Precursor intensities too low for (int i = 0; i < this.Count;) { Query query = this[i]; if (query.precursor.Track.INTENSITY < DicOfSpectrumIntensities[query.spectrum] * dbOptions.MinimumPrecursorIntensityRatioInIsolationWindow) { this.RemoveAt(i); } else { i++; } }//*/ Dictionary <ProductSpectrum, int> DicOfSpectrumTracks = new Dictionary <ProductSpectrum, int>(); for (int i = 0; i < this.Count;) { Query query = this[i]; if (!Isotopes.ContainsKey(query.precursor.Track)) { if (!DicOfSpectrumTracks.ContainsKey(query.spectrum)) { DicOfSpectrumTracks.Add(query.spectrum, 1); } else { DicOfSpectrumTracks[query.spectrum]++; } i++; } else { this.RemoveAt(i); } } double averageNbPrecursorPerSpectrum = 0; int nbSpectrumMatchedToTrack = 0; foreach (ProductSpectrum spectrum in DicOfSpectrumTracks.Keys) { nbSpectrumMatchedToTrack++; averageNbPrecursorPerSpectrum += DicOfSpectrumTracks[spectrum]; } dbOptions.ConSole.WriteLine(entry.sSDF + " :" + Precursors.Count + " precursors [" + Isotopes.Count + " isotopes] spreaded in " + Count + " queries [" + nbMissedTrack + " trackless precursors]"); dbOptions.ConSole.WriteLine("Average Precursors per Spectrum : " + averageNbPrecursorPerSpectrum / (double)nbSpectrumMatchedToTrack); }
public static Spectra Load(pwiz.CLI.msdata.MSDataFile msFile, DBOptions options, string filePath, bool loadMS = true, bool filterMS2 = true) { //Find file name in msFile; string mzMlFilepath = filePath; int num_spectra = msFile.run.spectrumList.size(); Spectra spectra = new Spectra(num_spectra); //List<Trail> trails = new List<Trail>(); MS1Spectrum previousMS1 = null; try { //TODO DONT forget to remove the limiter //int maxNbMSMS = 10; double LastMs1InjectionTime = 0; for (int i = 0; i < num_spectra /* && i < 200*/; i++)//TODO Fix that later! { //Spectrum pwiz.CLI.msdata.Spectrum spec = msFile.run.spectrumList.spectrum(i, true); if (spec.precursors.Count > 0 || spec.cvParam(pwiz.CLI.cv.CVID.MS_ms_level).value > 1)//is an MSMS { double retention_time = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_scan_start_time).timeInSeconds() / 60.0; //List precursors and their intensities double precursor_mz = 0;//Is there a value for the time a scan took to complete? int charge = 2; double precursor_intensity = 0; string fragmentation_method = "unknown"; double isolationWindow = 1.0; double injectionTime = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_ion_injection_time).value; foreach (pwiz.CLI.msdata.Precursor precursor in spec.precursors) { fragmentation_method = precursor.activation.cvParams[0].name; if (precursor.isolationWindow.cvParams.Count > 2 && (double)precursor.isolationWindow.cvParams[1].value == (double)precursor.isolationWindow.cvParams[2].value) { isolationWindow = precursor.isolationWindow.cvParams[1].value; } else if (precursor.isolationWindow.cvParams.Count > 2) { options.ConSole.WriteLine("Weird Isolation Window"); } foreach (pwiz.CLI.msdata.SelectedIon ion in precursor.selectedIons) { //Cycle through MS to get real precursor intensities precursor_mz = ion.cvParams[0].value; if (ion.cvParams.Count > 1) { charge = (int)ion.cvParams[1].value; } //else // dbOptions.ConSole.WriteLine("No charge computed for precursor "); if (ion.cvParams.Count > 2) { precursor_intensity = ion.cvParams[2].value; } } } int scan_index = i; int scan_number = scan_index + 1; pwiz.CLI.msdata.BinaryDataArray mz = spec.getMZArray(); pwiz.CLI.msdata.BinaryDataArray intensity = spec.getIntensityArray(); int num_peaks = mz.data.Count; if (num_peaks != intensity.data.Count) { options.ConSole.WriteLine("PreoteWizard reports peaks arrays (mz/intensity) of different sizes : (" + num_peaks + "/" + intensity.data.Count + ")"); if (intensity.data.Count < num_peaks) { num_peaks = intensity.data.Count; } } GraphML_List <MsMsPeak> peaks = new GraphML_List <MsMsPeak>(num_peaks); for (int k = 0; k < num_peaks; k++) { if (intensity.data[k] > 0) { MsMsPeak peak = new MsMsPeak(mz.data[k], intensity.data[k], 0); peaks.Add(peak); } } mz.Dispose(); mz = null; intensity.Dispose(); intensity = null; peaks.Sort(MsMsPeak.AscendingMzComparison); if (filterMS2) { //peaks = AssignChargeStates(peaks, options.maximumAssumedPrecursorChargeState, options.precursorMassTolerance); //peaks = Deisotopebkp(peaks, options.maximumAssumedPrecursorChargeState, options.precursorMassTolerance); peaks = AssignChargeStatesAndDeisotope(peaks, options.MaximumPrecursorChargeState, new MassTolerance(options.productMassTolerance.Value * 0.5, options.productMassTolerance.Units)); peaks = FilterPeaks(peaks, options.MaximumNumberOfFragmentsPerSpectrum); //TODO Add Contaminant removal //peaks = ContaminantMasses.RemoveContaminantsFromMzSortedList(peaks, options.productMassTolerance); //Can sometime be sorted by intensity after this call //peaks = FilterPeaksV2(peaks); peaks.Sort(MsMsPeak.AscendingMzComparison); } /*//TODO Validate that in most cases, next steps can calculate missing charge * if (charge == 0) * { * for (int c = options.minimumAssumedPrecursorChargeState; c <= options.maximumAssumedPrecursorChargeState; c++) * { * if (options.assignChargeStates) * { * peaks = AssignChargeStates(peaks, c, options.productMassTolerance); * if (options.deisotope) * { * peaks = Deisotope(peaks, c, options.productMassTolerance); * } * } * * double precursor_mass = Utilities.MassFromMZ(precursor_mz, c); * * ProductSpectrum spectrum = new ProductSpectrum(mzMlFilepath, scan_number, retention_time, fragmentation_method, precursor_mz, precursor_intensity, c, precursor_mass, peaks); * spectra.Add(spectrum); * } * } * else//*/ {/* * if (options.assignChargeStates) * { * peaks = AssignChargeStatesbkp(peaks, charge, options.productMassTolerance); * if (options.deisotope) * { * peaks = Deisotopebkp(peaks, charge, options.productMassTolerance); * } * }//*/ //peaks = AssignChargeStatesAndDeisotope(peaks, options.maximumAssumedPrecursorChargeState, options.productMassTolerance); double precursor_mass = Numerics.MassFromMZ(precursor_mz, charge); ProductSpectrum spectrum = new ProductSpectrum(scan_number, retention_time, fragmentation_method, precursor_mz, precursor_intensity, charge, precursor_mass, peaks, isolationWindow, injectionTime, LastMs1InjectionTime); spectra.AddMSMS(spectrum); //zones.Add(new Zone(precursor_mz - isolationWindow, precursor_mz + isolationWindow, retention_time)); } //if (spectra.Count >= maxNbMSMS) // i = 10000000; } else //Is an MS { LastMs1InjectionTime = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_ion_injection_time).value; if (loadMS) { double retention_time = spec.scanList.scans[0].cvParam(pwiz.CLI.cv.CVID.MS_scan_start_time).timeInSeconds() / 60.0; pwiz.CLI.msdata.BinaryDataArray mz = spec.getMZArray(); pwiz.CLI.msdata.BinaryDataArray intensity = spec.getIntensityArray(); if (previousMS1 != null) { previousMS1.ScanDuration = retention_time - previousMS1.RetentionTimeInMin; spectra.MS1s.Add(previousMS1); } previousMS1 = new MS1Spectrum(i, retention_time, intensity.data, mz.data, 1); //Trail.Follow(mz.data, intensity.data, retention_time, ref trails, options); //Trail.RemoveFinished(ref trails, spectra, 1); } } spec.Dispose(); spec = null; Console.Write("\r{0}% ", ((100 * i) / num_spectra)); } if (previousMS1 != null) { spectra.MS1s.Add(previousMS1); } /* * //Optimization of Track following parameters * long nbChargedTracks = 0; * for(int missingScans = 1; missingScans < 5; missingScans++) * { * for(int centroid = 1; centroid < 5; centroid++) * { * for(int minPeaks = 1; minPeaks < 7; minPeaks++) * { * for(double valleyFactor = 0.1; valleyFactor < 4; valleyFactor += 0.3) * { * //weightedMean * Tracks tracks = ComputeSpectraTracks(spectra, options, mzMlFilepath, missingScans, centroid, minPeaks, valleyFactor, MaxQuant.CentroidPosition.weightedMean); * tracks.Sort(Tracks.AscendingPrecursorMassComparison); * long cumulIsotopes = 0; * foreach (stTrack track in tracks) * cumulIsotopes += Queries.GetIsotopes(track, options, tracks, sample).Count; * if (cumulIsotopes > nbChargedTracks) * { * nbChargedTracks = cumulIsotopes; * dbOptions.ConSole.WriteLine(missingScans + "," + centroid + "," + minPeaks + "," + valleyFactor + ",weightedMean"); * } * * //Gaussian * tracks = ComputeSpectraTracks(spectra, options, mzMlFilepath, missingScans, centroid, minPeaks, valleyFactor, MaxQuant.CentroidPosition.gaussian); * tracks.Sort(Tracks.AscendingPrecursorMassComparison); * cumulIsotopes = 0; * foreach (stTrack track in tracks) * cumulIsotopes += Queries.GetIsotopes(track, options, tracks, sample).Count; * if (cumulIsotopes > nbChargedTracks) * { * nbChargedTracks = cumulIsotopes; * dbOptions.ConSole.WriteLine(missingScans + "," + centroid + "," + minPeaks + "," + valleyFactor + ",Gaussian"); * } * } * } * } * }//*/ if (spectra.MS1s.Count > 0) { spectra.tracks = ComputeSpectraTracks(spectra, options, mzMlFilepath, 3, 1, 3, 1.7, MaxQuant.CentroidPosition.weightedMean); } else { spectra.tracks = new Tracks(); } spectra.tracks.Sort(Tracks.AscendingPrecursorMassComparison); Console.Write("\r{0}% ", 100); //ContaminantMasses.DisplayContaminants(); } catch (Exception ex) { options.ConSole.WriteLine(ex.StackTrace); options.ConSole.WriteLine(ex.Message); } return(spectra); }