public RawFileInfo(string fullFilePathWithExtension, IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > dataFile) { this.fullFilePathWithExtension = fullFilePathWithExtension; this.filenameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(this.fullFilePathWithExtension); this.dataFile = dataFile; clearAfterDone = false; }
public RawFileInfo(string fullFilePathWithExtension) { this.fullFilePathWithExtension = fullFilePathWithExtension; this.filenameWithoutExtension = System.IO.Path.GetFileNameWithoutExtension(this.fullFilePathWithExtension); this.dataFile = null; clearAfterDone = true; }
public LocalizationEngine(IEnumerable <PeptideSpectralMatch> allResultingIdentifications, List <ProductType> lp, IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, Tolerance fragmentTolerance, List <string> nestedIds, bool addCompIons) : base(nestedIds) { this.allResultingIdentifications = allResultingIdentifications; this.lp = lp; this.myMsDataFile = myMsDataFile; this.fragmentTolerance = fragmentTolerance; this.addCompIons = addCompIons; this.dissociationTypes = DetermineDissociationType(lp); }
public static IEnumerable <Ms2ScanWithSpecificMass> GetMs2Scans( IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMSDataFile, string fullFilePath, bool doPrecursorDeconvolution, bool useProvidedPrecursorInfo, double deconvolutionIntensityRatio, int deconvolutionMaxAssumedChargeState, Tolerance deconvolutionMassTolerance) { foreach (var ms2scan in myMSDataFile.OfType <IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > >()) { List <(double, int)> isolatedStuff = new List <(double, int)>(); if (ms2scan.OneBasedPrecursorScanNumber.HasValue) { var precursorSpectrum = myMSDataFile.GetOneBasedScan(ms2scan.OneBasedPrecursorScanNumber.Value); ms2scan.RefineSelectedMzAndIntensity(precursorSpectrum.MassSpectrum); if (ms2scan.SelectedIonMonoisotopicGuessMz.HasValue) { ms2scan.ComputeMonoisotopicPeakIntensity(precursorSpectrum.MassSpectrum); } if (doPrecursorDeconvolution) { foreach (var envelope in ms2scan.GetIsolatedMassesAndCharges(precursorSpectrum.MassSpectrum, 1, deconvolutionMaxAssumedChargeState, deconvolutionMassTolerance.Value, deconvolutionIntensityRatio)) { var monoPeakMz = envelope.monoisotopicMass.ToMz(envelope.charge); isolatedStuff.Add((monoPeakMz, envelope.charge)); } } } if (useProvidedPrecursorInfo && ms2scan.SelectedIonChargeStateGuess.HasValue) { var precursorCharge = ms2scan.SelectedIonChargeStateGuess.Value; if (ms2scan.SelectedIonMonoisotopicGuessMz.HasValue) { var precursorMZ = ms2scan.SelectedIonMonoisotopicGuessMz.Value; if (!isolatedStuff.Any(b => deconvolutionMassTolerance.Within(precursorMZ.ToMass(precursorCharge), b.Item1.ToMass(b.Item2)))) { isolatedStuff.Add((precursorMZ, precursorCharge)); } } else { var precursorMZ = ms2scan.SelectedIonMZ; if (!isolatedStuff.Any(b => deconvolutionMassTolerance.Within(precursorMZ.ToMass(precursorCharge), b.Item1.ToMass(b.Item2)))) { isolatedStuff.Add((precursorMZ, precursorCharge)); } } } foreach (var heh in isolatedStuff) { yield return(new Ms2ScanWithSpecificMass(ms2scan, heh.Item1, heh.Item2, fullFilePath)); } } }
public SummedMsDataFile(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > raw, int numScansToAverage, double ppmToleranceForPeakCombination) : base(raw.NumSpectra - numScansToAverage + 1, new SourceFile( @"scan number only nativeID format", raw.SourceFile.MassSpectrometerFileFormat, raw.SourceFile.CheckSum, raw.SourceFile.FileChecksumType, raw.SourceFile.Uri, raw.SourceFile.Id, raw.SourceFile.FileName)) { this.raw = raw; this.numScansToAverage = numScansToAverage; this.ppmToleranceForPeakCombination = ppmToleranceForPeakCombination; }
public DataPointAcquisitionEngine( List <PeptideSpectralMatch> goodIdentifications, IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, Tolerance mzToleranceForMs1Search, Tolerance mzToleranceForMs2Search, int numFragmentsNeededForEveryIdentification, int minMS1isotopicPeaksNeededForConfirmedIdentification, int minMS2isotopicPeaksNeededForConfirmedIdentification, FragmentTypes fragmentTypesForCalibration, List <string> nestedIds) : base(nestedIds) { this.goodIdentifications = goodIdentifications; this.myMsDataFile = myMsDataFile; this.mzToleranceForMs1Search = mzToleranceForMs1Search; this.mzToleranceForMs2Search = mzToleranceForMs2Search; this.numFragmentsNeededForEveryIdentification = numFragmentsNeededForEveryIdentification; this.minMS1isotopicPeaksNeededForConfirmedIdentification = minMS1isotopicPeaksNeededForConfirmedIdentification; this.minMS2isotopicPeaksNeededForConfirmedIdentification = minMS2isotopicPeaksNeededForConfirmedIdentification; this.fragmentTypesForCalibration = fragmentTypesForCalibration; }
public void LoadMzmlTest() { Mzml a = Mzml.LoadAllStaticData(@"tiny.pwiz.1.1.mzML"); var ya = a.GetOneBasedScan(1).MassSpectrum; Assert.AreEqual(15, ya.Size); var ya2 = a.GetOneBasedScan(2).MassSpectrum; Assert.AreEqual(10, ya2.Size); var ya3 = a.GetOneBasedScan(3).MassSpectrum; Assert.AreEqual(0, ya3.Size); var ya4 = a.GetOneBasedScan(4).MassSpectrum; Assert.AreEqual(15, ya4.Size); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > ok = a; Assert.AreEqual(1, ok.GetClosestOneBasedSpectrumNumber(5)); }
public static void CreateAndWriteMyMzmlWithCalibratedSpectra(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string outputFile, bool writeIndexed) { var mzML = new Generated.mzMLType() { version = "1", cvList = new Generated.CVListType() }; mzML.cvList.count = "1"; mzML.cvList.cv = new Generated.CVType[1]; mzML.cvList.cv[0] = new Generated.CVType() { URI = @"https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo", fullName = "Proteomics Standards Initiative Mass Spectrometry Ontology", id = "MS" }; mzML.fileDescription = new Generated.FileDescriptionType() { fileContent = new Generated.ParamGroupType() }; mzML.fileDescription.fileContent.cvParam = new Generated.CVParamType[2]; mzML.fileDescription.fileContent.cvParam[0] = new Generated.CVParamType() { accession = "MS:1000579" // MS1 Data }; mzML.fileDescription.fileContent.cvParam[1] = new Generated.CVParamType() { accession = "MS:1000580" // MSn Data }; mzML.softwareList = new Generated.SoftwareListType() { count = "1", software = new Generated.SoftwareType[1] }; // TODO: add the raw file fields mzML.softwareList.software[0] = new Generated.SoftwareType() { id = "mzLib", version = "1", cvParam = new Generated.CVParamType[1] }; mzML.softwareList.software[0].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000799", value = "mzLib" }; // Leaving empty. Can't figure out the configurations. // ToDo: read instrumentConfigurationList from mzML file mzML.instrumentConfigurationList = new Generated.InstrumentConfigurationListType(); mzML.dataProcessingList = new Generated.DataProcessingListType() { count = "1", dataProcessing = new Generated.DataProcessingType[1] }; // Only writing mine! Might have had some other data processing (but not if it is a raw file) // ToDo: read dataProcessingList from mzML file mzML.dataProcessingList.dataProcessing[0] = new Generated.DataProcessingType() { id = "mzLibProcessing" }; mzML.run = new Generated.RunType() { chromatogramList = new Generated.ChromatogramListType() { count = "1", chromatogram = new Generated.ChromatogramType[1] } }; // ToDo: Finish the chromatogram writing! mzML.run.chromatogramList.chromatogram[0] = new Generated.ChromatogramType(); mzML.run.spectrumList = new Generated.SpectrumListType() { count = (myMsDataFile.NumSpectra).ToString(CultureInfo.InvariantCulture), defaultDataProcessingRef = "mzLibProcessing", spectrum = new Generated.SpectrumType[myMsDataFile.NumSpectra] }; // Loop over all spectra for (int i = 1; i <= myMsDataFile.NumSpectra; i++) { mzML.run.spectrumList.spectrum[i - 1] = new Generated.SpectrumType() { defaultArrayLength = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size, index = i.ToString(CultureInfo.InvariantCulture), id = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString(), cvParam = new Generated.CVParamType[8] }; mzML.run.spectrumList.spectrum[i - 1].cvParam[0] = new Generated.CVParamType(); if (myMsDataFile.GetOneBasedScan(i).MsnOrder == 1) { mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000579"; } else if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ) { var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000580"; // So needs a precursor! mzML.run.spectrumList.spectrum[i - 1].precursorList = new Generated.PrecursorListType() { count = 1.ToString(), precursor = new Generated.PrecursorType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0] = new Generated.PrecursorType(); string precursorID = scanWithPrecursor.OneBasedPrecursorScanNumber.ToString(); mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].spectrumRef = precursorID; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList = new Generated.SelectedIonListType() { count = 1.ToString(), selectedIon = new Generated.ParamGroupType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0] = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[3] }; // Selected ion MZ mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[0] = new Generated.CVParamType() { name = "selected ion m/z", value = scanWithPrecursor.SelectedIonMZ.ToString(CultureInfo.InvariantCulture), accession = "MS:1000744" }; // Charge State if (scanWithPrecursor.SelectedIonChargeStateGuess.HasValue) { mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[1] = new Generated.CVParamType() { name = "charge state", value = scanWithPrecursor.SelectedIonChargeStateGuess.Value.ToString(CultureInfo.InvariantCulture), accession = "MS:1000041" }; } // Selected ion intensity if (scanWithPrecursor.SelectedIonIntensity.HasValue) { mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[2] = new Generated.CVParamType() { name = "peak intensity", value = scanWithPrecursor.SelectedIonIntensity.Value.ToString(CultureInfo.InvariantCulture), accession = "MS:1000042" }; } MzRange isolationRange = scanWithPrecursor.IsolationRange; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[3] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[0] = new Generated.CVParamType() { accession = "MS:1000827", name = "isolation window target m/z", value = isolationRange.Mean.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[1] = new Generated.CVParamType() { accession = "MS:1000828", name = "isolation window lower offset", value = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[2] = new Generated.CVParamType() { accession = "MS:1000829", name = "isolation window upper offset", value = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[1] }; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0] = new Generated.CVParamType(); DissociationType dissociationType = scanWithPrecursor.DissociationType; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].accession = DissociationTypeAccessions[dissociationType]; mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].name = DissociationTypeNames[dissociationType]; } mzML.run.spectrumList.spectrum[i - 1].cvParam[1] = new Generated.CVParamType() { name = "ms level", accession = "MS:1000511", value = myMsDataFile.GetOneBasedScan(i).MsnOrder.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].cvParam[2] = new Generated.CVParamType() { name = CentroidNames[myMsDataFile.GetOneBasedScan(i).IsCentroid], accession = CentroidAccessions[myMsDataFile.GetOneBasedScan(i).IsCentroid] }; if (PolarityNames.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityName) && PolarityAccessions.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityAccession)) { mzML.run.spectrumList.spectrum[i - 1].cvParam[3] = new Generated.CVParamType() { name = polarityName, accession = polarityAccession }; } // Spectrum title mzML.run.spectrumList.spectrum[i - 1].cvParam[4] = new Generated.CVParamType() { name = "spectrum title", accession = "MS:1000796", value = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString() }; if ((myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size) > 0) { // Lowest observed mz mzML.run.spectrumList.spectrum[i - 1].cvParam[5] = new Generated.CVParamType() { name = "lowest observed m/z", accession = "MS:1000528", value = myMsDataFile.GetOneBasedScan(i).MassSpectrum.FirstX.ToString(CultureInfo.InvariantCulture) }; // Highest observed mz mzML.run.spectrumList.spectrum[i - 1].cvParam[6] = new Generated.CVParamType() { name = "highest observed m/z", accession = "MS:1000527", value = myMsDataFile.GetOneBasedScan(i).MassSpectrum.LastX.ToString(CultureInfo.InvariantCulture) }; } // Total ion current mzML.run.spectrumList.spectrum[i - 1].cvParam[7] = new Generated.CVParamType() { name = "total ion current", accession = "MS:1000285", value = myMsDataFile.GetOneBasedScan(i).TotalIonCurrent.ToString(CultureInfo.InvariantCulture) }; // Retention time mzML.run.spectrumList.spectrum[i - 1].scanList = new Generated.ScanListType() { count = "1", scan = new Generated.ScanType[1] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0] = new Generated.ScanType() { cvParam = new Generated.CVParamType[3] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[0] = new Generated.CVParamType() { name = "scan start time", accession = "MS:1000016", value = myMsDataFile.GetOneBasedScan(i).RetentionTime.ToString(CultureInfo.InvariantCulture), unitCvRef = "UO", unitAccession = "UO:0000031", unitName = "minute" }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[1] = new Generated.CVParamType() { name = "filter string", accession = "MS:1000512", value = myMsDataFile.GetOneBasedScan(i).ScanFilter }; if (myMsDataFile.GetOneBasedScan(i).InjectionTime.HasValue) { mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[2] = new Generated.CVParamType() { name = "ion injection time", accession = "MS:1000927", value = myMsDataFile.GetOneBasedScan(i).InjectionTime.Value.ToString(CultureInfo.InvariantCulture) }; } if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ) { var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; if (scanWithPrecursor.SelectedIonMonoisotopicGuessMz.HasValue) { mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam[0] = new Generated.UserParamType() { name = "[mzLib]Monoisotopic M/Z:", value = scanWithPrecursor.SelectedIonMonoisotopicGuessMz.Value.ToString(CultureInfo.InvariantCulture) }; } } mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList = new Generated.ScanWindowListType() { count = 1, scanWindow = new Generated.ParamGroupType[1] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0] = new Generated.ParamGroupType() { cvParam = new Generated.CVParamType[2] }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[0] = new Generated.CVParamType() { name = "scan window lower limit", accession = "MS:1000501", value = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Minimum.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[1] = new Generated.CVParamType() { name = "scan window upper limit", accession = "MS:1000500", value = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Maximum.ToString(CultureInfo.InvariantCulture) }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList = new Generated.BinaryDataArrayListType() { // ONLY WRITING M/Z AND INTENSITY DATA, NOT THE CHARGE! (but can add charge info later) // CHARGE (and other stuff) CAN BE IMPORTANT IN ML APPLICATIONS!!!!! count = 2.ToString(), binaryDataArray = new Generated.BinaryDataArrayType[5] }; // M/Z Data mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitXarray() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000514", name = "m/z array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; // Intensity Data mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitYarray() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000515", name = "intensity array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; if (myMsDataFile.GetOneBasedScan(i).NoiseData != null) { // mass mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataMass() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise m/z" }; // noise mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataNoise() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise baseline" }; // baseline mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4] = new Generated.BinaryDataArrayType() { binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataBaseline() }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].binary.Length / 3))).ToString(CultureInfo.InvariantCulture); mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam = new Generated.CVParamType[3]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[0] = new Generated.CVParamType() { accession = "MS:1000786", name = "non-standard data array" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[1] = new Generated.CVParamType() { accession = "MS:1000523", name = "64-bit float" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[2] = new Generated.CVParamType() { accession = "MS:1000576", name = "no compression" }; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam = new Generated.UserParamType[1]; mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam[0] = new Generated.UserParamType() { name = "kelleherCustomType", value = "noise intensity" }; } } if (writeIndexed) { throw new NotImplementedException("Writing indexed mzMLs not yet supported"); } else { using (TextWriter writer = new StreamWriter(outputFile)) { mzmlSerializer.Serialize(writer, mzML); } } }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this); List <PsmCross> allPsms = new List <PsmCross>(); var compactPeptideToProteinPeptideMatch = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); Status("Loading modifications...", taskId); #region Load modifications List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); #endregion Load modifications Status("Loading proteins...", new List <string> { taskId }); var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, XlSearchParameters.DecoyType, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> unknownModifications)).ToList(); List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.BnoB1ions); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(ionTypes); var crosslinker = new CrosslinkerTypeClass(); crosslinker.SelectCrosslinker(XlSearchParameters.CrosslinkerType); if (XlSearchParameters.CrosslinkerType == CrosslinkerType.UserDefined) { crosslinker.CrosslinkerName = XlSearchParameters.UdXLkerName; crosslinker.Cleavable = XlSearchParameters.UdXLkerCleavable; crosslinker.TotalMass = XlSearchParameters.UdXLkerTotalMass.HasValue ? (double)XlSearchParameters.UdXLkerTotalMass : 9999; crosslinker.CleaveMassShort = XlSearchParameters.UdXLkerShortMass.HasValue ? (double)XlSearchParameters.UdXLkerShortMass : 9999; crosslinker.CleaveMassLong = XlSearchParameters.UdXLkerShortMass.HasValue ? (double)XlSearchParameters.UdXLkerLongMass : 9999; crosslinker.CrosslinkerModSite = XlSearchParameters.UdXLkerResidue; crosslinker.LoopMass = XlSearchParameters.UdXLkerLoopMass.HasValue ? (double)XlSearchParameters.UdXLkerLoopMass : 9999; crosslinker.DeadendMassH2O = XlSearchParameters.UdXLkerDeadendMassH2O.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassH2O : 9999; crosslinker.DeadendMassNH2 = XlSearchParameters.UdXLkerDeadendMassNH2.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassNH2 : 9999; crosslinker.DeadendMassTris = XlSearchParameters.UdXLkerDeadendMassTris.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassTris : 9999; } ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } MyFileManager myFileManager = new MyFileManager(XlSearchParameters.DisposeOfFileWhenDone); HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b))); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); #region proseCreatedWhileRunning proseCreatedWhileRunning.Append("The following crosslink discovery were used: "); proseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; "); proseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; "); proseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; "); proseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSite + "; "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("parent mass tolerance(s) = " + XlSearchParameters.XlPrecusorMsTl + "; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); #endregion proseCreatedWhileRunning Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); List <PsmCross> newPsms = new List <PsmCross>(); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); //List<Ms2ScanWithSpecificMass> arrayOfMs2ScansSortedByMass = new List<Ms2ScanWithSpecificMass>(); //arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList(); //Code to resolve MS3 data //if (XlSearchParameters.FragmentationType == FragmentaionType.MS2_HCD || XlSearchParameters.FragmentationType == FragmentaionType.MS2_EthCD) //{ // arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList(); //} //else //{ // arrayOfMs2ScansSortedByMass = GetCombinedMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList(); //} for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <CompactPeptide> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); #region Generate indices for modern search Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, ListOfDigestionParams, combinedParams, 30000.0, new List <string> { taskId }); List <int>[] fragmentIndex = null; lock (indexLock) GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId); #endregion Generate indices for modern search Status("Searching files...", taskId); new TwoPassCrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, false, XlSearchParameters.XlPrecusorMsTl, crosslinker, XlSearchParameters.CrosslinkSearchTop, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, XlSearchParameters.XlCharge_2_3, XlSearchParameters.XlCharge_2_3_PrimeFragment, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); } lock (psmLock) { allPsms.AddRange(newPsms); } completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); }); ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); Status("Crosslink analysis engine", taskId); MetaMorpheusEngineResults allcrosslinkanalysisResults; allcrosslinkanalysisResults = new CrosslinkAnalysisEngine(allPsms, compactPeptideToProteinPeptideMatch, proteinList, variableModifications, fixedModifications, ionTypes, OutputFolder, crosslinker, terminusType, CommonParameters, new List <string> { taskId }).Run(); allPsms = allPsms.Where(p => p != null).ToList(); if (XlSearchParameters.XlOutAll) { try { WriteAllToTsv(allPsms, OutputFolder, "allPsms", new List <string> { taskId }); } catch (Exception) { throw; } } var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).Where(p => p.XLBestScore >= CommonParameters.ScoreCutoff && p.BetaPsmCross.XLBestScore >= CommonParameters.ScoreCutoff).ToList(); foreach (var item in allPsmsXL) { if (item.OneBasedStartResidueInProtein.HasValue) { item.XlProteinPos = item.OneBasedStartResidueInProtein.Value + item.XlPos - 1; } if (item.BetaPsmCross.OneBasedStartResidueInProtein.HasValue) { item.BetaPsmCross.XlProteinPos = item.BetaPsmCross.OneBasedStartResidueInProtein.Value + item.BetaPsmCross.XlPos - 1; } } #region Inter Crosslink //Write Inter Psms FDR var interPsmsXL = allPsmsXL.Where(p => !p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) && !p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList(); foreach (var item in interPsmsXL) { item.CrossType = PsmCrossType.Inter; } var interPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(interPsmsXL).ToList(); //var interPsmsXLFDR = CrosslinkFDRAnalysis(interPsmsXL).ToList(); if (XlSearchParameters.XlOutCrosslink) { WriteCrosslinkToTsv(interPsmsXLFDR, OutputFolder, "xl_inter_fdr", new List <string> { taskId }); } if (XlSearchParameters.XlOutPercolator) { try { var interPsmsXLPercolator = interPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "xl_inter_perc", crosslinker, new List <string> { taskId }); } catch (Exception) { throw; } } #endregion Inter Crosslink #region Intra Cross-link //Write Intra Psms FDR var intraPsmsXL = allPsmsXL.Where(p => p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() == p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() || p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) || p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList(); foreach (var item in intraPsmsXL) { item.CrossType = PsmCrossType.Intra; } var intraPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(intraPsmsXL).ToList(); //var intraPsmsXLFDR = CrosslinkFDRAnalysis(intraPsmsXL).ToList(); if (XlSearchParameters.XlOutCrosslink) { WriteCrosslinkToTsv(intraPsmsXLFDR, OutputFolder, "xl_intra_fdr", new List <string> { taskId }); } if (XlSearchParameters.XlOutPercolator) { try { var intraPsmsXLPercolator = intraPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "xl_intra_perc", crosslinker, new List <string> { taskId }); } catch (Exception) { throw; } } #endregion Intra Cross-link #region Single peptide var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Singe && !p.FullSequence.Contains("Crosslink")).OrderByDescending(p => p.Score).ToList(); var singlePsmsFDR = SingleFDRAnalysis(singlePsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(singlePsmsFDR, OutputFolder, "single_fdr", new List <string> { taskId }); } #endregion Single peptide #region Loop peptide var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).OrderByDescending(p => p.XLTotalScore).ToList(); var loopPsmsFDR = SingleFDRAnalysis(loopPsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(loopPsmsFDR, OutputFolder, "loop_fdr", new List <string> { taskId }); } #endregion Loop peptide #region deadend peptide var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).OrderByDescending(p => p.XLTotalScore).ToList(); deadendPsms.AddRange(allPsms.Where(p => p.CrossType == PsmCrossType.Singe && p.FullSequence.Contains("Crosslink")).ToList()); var deadendPsmsFDR = SingleFDRAnalysis(deadendPsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(deadendPsmsFDR, OutputFolder, "deadend_fdr", new List <string> { taskId }); } #endregion deadend peptide if (XlSearchParameters.XlOutPepXML) { List <PsmCross> allPsmsFDR = new List <PsmCross>(); allPsmsFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(singlePsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(loopPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(deadendPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR = allPsmsFDR.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WritePepXML_xl(allPsmsFDR.Where(p => p.FullFilePath == fullFilePath).ToList(), dbFilenameList, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> { taskId }); } } if (XlSearchParameters.XlOutAll) { List <PsmCross> allPsmsXLFDR = new List <PsmCross>(); allPsmsXLFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsXLFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); try { allPsmsXLFDR = allPsmsXLFDR.OrderByDescending(p => p.XLQvalueTotalScore).ToList(); var allPsmsXLFDRGroup = FindCrosslinks(allPsmsXLFDR); WriteCrosslinkToTsv(allPsmsXLFDRGroup, OutputFolder, "allPsmsXLFDRGroup", new List <string> { taskId }); } catch (Exception) { throw; } } return(myTaskResults); }
private void LoadData(string path, FileType fileType) { if (fileType == FileType.DeconvolutionTSV) { System.IO.StreamReader reader = new System.IO.StreamReader(path); string line; int lineNum = 1; while (reader.Peek() > 0) { line = reader.ReadLine(); List <IsotopicEnvelope> envs = new List <IsotopicEnvelope>(); if (lineNum != 1) { var parsedLine = line.Split('\t'); var mass = double.Parse(parsedLine[0]); var apexRt = double.Parse(parsedLine[10]); var envelopes = parsedLine[17].Split(new string[] { "[", "]" }, StringSplitOptions.RemoveEmptyEntries); foreach (var envelope in envelopes) { var split = envelope.Split(new string[] { "|", "(", ")" }, StringSplitOptions.RemoveEmptyEntries); int charge = int.Parse(split[0]); double rt = double.Parse(split[1]); int scan = int.Parse(split[2]); List <MassSpectralPeak> peaks = new List <MassSpectralPeak>(); for (int i = 3; i < split.Length; i++) { string[] sp = split[i].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); MassSpectralPeak peak = new MassSpectralPeak(double.Parse(sp[0]), double.Parse(sp[1])); peaks.Add(peak); } IsotopicEnvelope env = new IsotopicEnvelope(rt, charge, peaks); envs.Add(env); } var gsdf = envs.GroupBy(p => p.charge).Where(v => v.Count() > 1); if (gsdf.Any()) { var deconvolutedFeature = new ChromatographicPeak(envs, mass, apexRt); if (deconvolutedFeature.GetSignalToBaseline() > 2.0) { DeconvolutedFeatures.Add(deconvolutedFeature); } } } lineNum++; } reader.Close(); } else if (fileType == FileType.MetaMorpheusPsmTsv) { System.IO.StreamReader reader = new System.IO.StreamReader(path); string line; int lineNum = 1; while (reader.Peek() > 0) { line = reader.ReadLine(); List <IsotopicEnvelope> envs = new List <IsotopicEnvelope>(); if (lineNum != 1) { var parsedLine = line.Split('\t'); } } } else if (fileType == FileType.RawFile) { string ext = System.IO.Path.GetExtension(path).ToUpperInvariant(); if (ext.Equals(".RAW")) { rawFile = ThermoStaticData.LoadAllStaticData(path); } if (ext.Equals(".MZML")) { rawFile = Mzml.LoadAllStaticData(path); } else { throw new Exception("Cannot read file format: " + ext); } } else { throw new Exception("Cannot read file " + path); } }
private (List <PeptideSpectralMatch>, DataPointAquisitionResults) GetDataAcquisitionResults(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, ICommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol) { var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile); MassDiffAcceptor searchMode; if (initPrecTol is PpmTolerance) { searchMode = new SinglePpmAroundZeroSearchMode(initPrecTol.Value); } else { searchMode = new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value); } FragmentTypes fragmentTypesForCalibration = FragmentTypes.None; if (combinedParameters.BIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b; } if (combinedParameters.YIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y; } if (combinedParameters.CIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c; } if (combinedParameters.ZdotIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot; } var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; List <ProductType> lp = new List <ProductType>(); if (combinedParameters.BIons) { lp.Add(ProductType.B); } if (combinedParameters.YIons) { lp.Add(ProductType.Y); } if (combinedParameters.CIons) { lp.Add(ProductType.C); } if (combinedParameters.ZdotIons) { lp.Add(ProductType.Zdot); } Log("Searching with searchMode: " + searchMode, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); Log("Searching with productMassTolerance: " + initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchMode, false, combinedParameters, initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> allPsms = allPsmsArray.ToList(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> { combinedParameters.DigestionParams }, combinedParameters.ReportAllAmbiguity, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run()).CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); } } allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, searchMode.NumNotches, false, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList(); if (!goodIdentifications.Any()) { Warn("No PSMs below 1% FDR observed!"); return(new List <PeptideSpectralMatch>(), null); } var dissociationTypes = MetaMorpheusEngine.DetermineDissociationType(lp); foreach (var psm in allPsms) { var theScan = myMsDataFile.GetOneBasedScan(psm.ScanNumber); double thePrecursorMass = psm.ScanPrecursorMass; foreach (var huh in lp) { var ionMasses = psm.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { huh }); Array.Sort(ionMasses); List <double> matchedIonMassesList = new List <double>(); List <double> productMassErrorDaList = new List <double>(); List <double> productMassErrorPpmList = new List <double>(); LocalizationEngine.MatchIons(theScan, initProdTol, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, false); double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray(); psm.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches); psm.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray()); psm.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray()); } } DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine( goodIdentifications, myMsDataFile, initPrecTol, initProdTol, CalibrationParameters.NumFragmentsNeededForEveryIdentification, CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification, CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification, fragmentTypesForCalibration, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); return(goodIdentifications, currentResult); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this); if (NeoType.Equals(NeoTaskType.AggregateTargetDecoyFiles)) { //getfolders if (NeoParameters.DecoyFilePath == null) { NeoParameters.DecoyFilePath = new DirectoryInfo(OutputFolder).Name; string taskString = NeoParameters.DecoyFilePath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum--; NeoParameters.DecoyFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.DecoyFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; if (NeoParameters.TargetFilePath == null) { NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name; taskNum--; NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; } } if (NeoParameters.TargetFilePath == null) { NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name; string taskString = NeoParameters.TargetFilePath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum--; NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; } AggregateSearchFiles.Combine(NeoParameters.TargetFilePath, NeoParameters.DecoyFilePath, OutputFolder + "\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0])); } else if (NeoType.Equals(NeoTaskType.AggregateNormalSplicedFiles)) { //reset database dbFilenameList = StoredDatabases; string normalPath = ""; string cisPath = new DirectoryInfo(OutputFolder).Name; string taskString = cisPath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum -= 2; string transPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + (taskNum + 1) + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; cisPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; AggregateSearchFiles.RecursiveNeoAggregation(normalPath, cisPath, OutputFolder, "CisResults.psmtsv"); AggregateSearchFiles.RecursiveNeoAggregation(normalPath, transPath, OutputFolder, "TransResults.psmtsv"); } else if (NeoType.Equals(NeoTaskType.GenerateSplicedPeptides)) { NeoMassCalculator.ImportMasses(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } MyFileManager myFileManager = new MyFileManager(true); //Import Spectra Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); //Import Database Status("Loading modifications...", taskId); #region Load modifications List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.ListOfModTypesLocalize == null ? new List <string>() : CommonParameters.ListOfModTypesLocalize.ToList(); if (CommonParameters.LocalizeAll) { localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); } else { localizeableModificationTypes = GlobalVariables.AllModTypesKnown.Where(b => localizeableModificationTypes.Contains(b)).ToList(); } #endregion Load modifications var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.None, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> unknownModifications)).ToList(); //Read N and C files string nPath = NeoParameters.NFilePath; string cPath = NeoParameters.CFilePath; //if termini input if (nPath == null || cPath == null) { //if no termini input string taskHeader = "Task"; string[] pathArray = OutputFolder.Split('\\'); string basePath = ""; for (int i = 0; i < pathArray.Length - 1; i++) { basePath += pathArray[i] + '\\'; } string currentTaskNumber = pathArray[pathArray.Length - 1].Split('-')[0]; currentTaskNumber = currentTaskNumber.Substring(taskHeader.Length, currentTaskNumber.Length - taskHeader.Length); string NHeader = ""; string CHeader = ""; if (cPath == null) { CHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1); if (nPath == null) { NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 2); } } else { NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1); } foreach (string s in Directory.GetDirectories(basePath)) { if (s.Contains(NHeader)) { nPath = s; } else if (s.Contains(CHeader)) { cPath = s; } } string fileName = Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; nPath += "\\" + fileName; cPath += "\\" + fileName; } Status("Importing Search Results...", taskId); List <NeoPsm> psms = ImportPsmtsv.ImportNeoPsms(nPath, cPath); //Splice Status("Splicing Fragments...", taskId); List <NeoPsm> candidates = NeoSplicePeptides.SplicePeptides(psms); //Find Ambiguity Status("Identifying Ambiguity...", taskId); NeoFindAmbiguity.FindAmbiguity(candidates, proteinList, arrayOfMs2ScansSortedByMass, dbFilenameList[0].FilePath); //Export Results Status("Exporting Results...", taskId); NeoExport.ExportAll(candidates, arrayOfMs2ScansSortedByMass, OutputFolder); //Switch databases string outputFolder = NeoExport.path + NeoExport.folder + @"\" + NeoExport.folder + "FusionDatabaseAppendixNC.fasta"; dbFilenameList = new List <DbForTask>() { new DbForTask(outputFolder, false) }; });
public CalibrationEngine(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMSDataFile, DataPointAquisitionResults datapoints, List <string> nestedIds) : base(nestedIds) { this.myMsDataFile = myMSDataFile; this.datapoints = datapoints; }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this) { newDatabases = new List <DbForTask>() }; Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); List <ModificationWithMass> gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.B); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } Status("Loading proteins...", new List <string> { taskId }); Dictionary <string, Modification> um = null; //Decoys are currently not being searched with DecoyType.None var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out um)).ToList(); var numRawFiles = currentRawFileList.Count; proseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); //puppet searchmode for writing files. Actual searchmode is filespecific MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); proseCreatedWhileRunning.Append("parent mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); Status("Running G-PTM-D...", new List <string> { taskId }); HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b))); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, false, combinedParams, combinedParams.ProductMassTolerance, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); lock (lock2) { allPsms.AddRange(allPsmsArray); } FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); // Group and order psms SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngineTest = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, new List <string> { taskId }); var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngineTest.Run(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest); } } allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, false, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>()); SucessfullyFinishedWritingFile(writtenFile, new List <string> { taskId }); var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, CommonParameters.PrecursorMassTolerance, new List <string> { taskId }).Run(); if (dbFilenameList.Any(b => !b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => !b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileName(nonContaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); SucessfullyFinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullName, false)); myTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); SucessfullyFinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); myTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(myTaskResults); }