Beispiel #1
0
        private void CalibrateSpectra(RegressionForestModel ms1predictor, RegressionForestModel ms2predictor)
        {
            Parallel.ForEach(Partitioner.Create(1, myMsDataFile.NumSpectra + 1), fff =>
            {
                for (int i = fff.Item1; i < fff.Item2; i++)
                {
                    var scan = myMsDataFile.GetOneBasedScan(i);

                    if (scan is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2Scan)
                    {
                        var precursorScan = myMsDataFile.GetOneBasedScan(ms2Scan.OneBasedPrecursorScanNumber.Value);

                        if (!ms2Scan.SelectedIonMonoisotopicGuessIntensity.HasValue && ms2Scan.SelectedIonMonoisotopicGuessMz.HasValue)
                        {
                            ms2Scan.ComputeMonoisotopicPeakIntensity(precursorScan.MassSpectrum);
                        }

                        double theFunc(IPeak x) => x.X - ms2predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) });

                        double theFuncForPrecursor(IPeak x) => x.X - ms1predictor.Predict(new double[] { x.X, precursorScan.RetentionTime, Math.Log(precursorScan.TotalIonCurrent), precursorScan.InjectionTime.HasValue ? Math.Log(precursorScan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) });

                        ms2Scan.TransformMzs(theFunc, theFuncForPrecursor);
                    }
                    else
                    {
                        Func <IPeak, double> theFunc = x => x.X - ms1predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) });
                        scan.MassSpectrum.ReplaceXbyApplyingFunction(theFunc);
                    }
                }
            }
                             );
        }
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp);

            foreach (var ok in allResultingIdentifications)
            {
                ok.MatchedIonDictOnlyMatches = new Dictionary <ProductType, double[]>();
                ok.ProductMassErrorDa        = new Dictionary <ProductType, double[]>();
                ok.ProductMassErrorPpm       = new Dictionary <ProductType, double[]>();
                var    theScan          = myMsDataFile.GetOneBasedScan(ok.ScanNumber);
                double thePrecursorMass = ok.ScanPrecursorMass;
                foreach (var huh in lp)
                {
                    var ionMasses = ok.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> {
                        huh
                    });
                    Array.Sort(ionMasses);
                    List <double> matchedIonMassesList    = new List <double>();
                    List <double> productMassErrorDaList  = new List <double>();
                    List <double> productMassErrorPpmList = new List <double>();
                    MatchIons(theScan, fragmentTolerance, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, addCompIons);
                    double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray();
                    ok.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches);
                    ok.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray());
                    ok.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray());
                }
            }

            foreach (var ok in allResultingIdentifications.Where(b => b.NumDifferentCompactPeptides == 1))
            {
                var    theScan          = myMsDataFile.GetOneBasedScan(ok.ScanNumber);
                double thePrecursorMass = ok.ScanPrecursorMass;

                if (ok.FullSequence == null)
                {
                    continue;
                }

                var representative = ok.CompactPeptides.First().Value.Item2.First();

                var localizedScores = new List <double>();
                for (int indexToLocalize = 0; indexToLocalize < representative.Length; indexToLocalize++)
                {
                    PeptideWithSetModifications localizedPeptide = representative.Localize(indexToLocalize, ok.ScanPrecursorMass - representative.MonoisotopicMass);

                    var gg = localizedPeptide.CompactPeptide(terminusType).ProductMassesMightHaveDuplicatesAndNaNs(lp);
                    Array.Sort(gg);
                    var score = CalculatePeptideScore(theScan, fragmentTolerance, gg, thePrecursorMass, dissociationTypes, addCompIons, 0);
                    localizedScores.Add(score);
                }

                ok.LocalizedScores = localizedScores;
            }
            return(new LocalizationEngineResults(this));
        }
Beispiel #3
0
        public override IMsDataScan <IMzSpectrum <IMzPeak> > GetOneBasedScan(int oneBasedScanNumber)
        {
            if (Scans[oneBasedScanNumber - 1] == null)
            {
                var representativeScanNumber = oneBasedScanNumber + (numScansToAverage - 1) / 2;
                var representative           = raw.GetOneBasedScan(representativeScanNumber);
                if (representative.MsnOrder != 1)
                {
                    throw new MzLibException("Scan " + representativeScanNumber + " is not MS1 scan");
                }
                int      msnOrder = 1;
                Polarity polarity = representative.Polarity;
                if (!representative.IsCentroid)
                {
                    throw new MzLibException("Scan " + representativeScanNumber + " is not centroid scan");
                }
                bool           isCentroid    = true;
                double         retentionTime = representative.RetentionTime;
                MZAnalyzerType mzAnalyzer    = representative.MzAnalyzer;

                IMzSpectrum <IMzPeak> peaks = CombinePeaks(raw.Where(b => b.OneBasedScanNumber >= oneBasedScanNumber && b.OneBasedScanNumber <= oneBasedScanNumber + numScansToAverage - 1).Select(b => b.MassSpectrum).ToList(), ppmToleranceForPeakCombination);

                MzRange scanWindowRange = representative.ScanWindowRange;

                double totalIonCurrent = peaks.SumOfAllY;
                double injectionTime   = double.NaN;
                double[,] noiseData = null;

                Scans[oneBasedScanNumber - 1] = new MsDataScan <IMzSpectrum <IMzPeak> >(peaks, oneBasedScanNumber, msnOrder, isCentroid, polarity, retentionTime, scanWindowRange, null, mzAnalyzer, totalIonCurrent, injectionTime, noiseData, "scan=" + oneBasedScanNumber);
            }
            return(Scans[oneBasedScanNumber - 1]);
        }
Beispiel #4
0
        public static IEnumerable <Ms2ScanWithSpecificMass> GetMs2Scans(
            IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMSDataFile,
            string fullFilePath,
            bool doPrecursorDeconvolution,
            bool useProvidedPrecursorInfo,
            double deconvolutionIntensityRatio,
            int deconvolutionMaxAssumedChargeState,
            Tolerance deconvolutionMassTolerance)
        {
            foreach (var ms2scan in myMSDataFile.OfType <IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > >())
            {
                List <(double, int)> isolatedStuff = new List <(double, int)>();
                if (ms2scan.OneBasedPrecursorScanNumber.HasValue)
                {
                    var precursorSpectrum = myMSDataFile.GetOneBasedScan(ms2scan.OneBasedPrecursorScanNumber.Value);
                    ms2scan.RefineSelectedMzAndIntensity(precursorSpectrum.MassSpectrum);
                    if (ms2scan.SelectedIonMonoisotopicGuessMz.HasValue)
                    {
                        ms2scan.ComputeMonoisotopicPeakIntensity(precursorSpectrum.MassSpectrum);
                    }
                    if (doPrecursorDeconvolution)
                    {
                        foreach (var envelope in ms2scan.GetIsolatedMassesAndCharges(precursorSpectrum.MassSpectrum, 1, deconvolutionMaxAssumedChargeState, deconvolutionMassTolerance.Value, deconvolutionIntensityRatio))
                        {
                            var monoPeakMz = envelope.monoisotopicMass.ToMz(envelope.charge);
                            isolatedStuff.Add((monoPeakMz, envelope.charge));
                        }
                    }
                }

                if (useProvidedPrecursorInfo && ms2scan.SelectedIonChargeStateGuess.HasValue)
                {
                    var precursorCharge = ms2scan.SelectedIonChargeStateGuess.Value;
                    if (ms2scan.SelectedIonMonoisotopicGuessMz.HasValue)
                    {
                        var precursorMZ = ms2scan.SelectedIonMonoisotopicGuessMz.Value;
                        if (!isolatedStuff.Any(b => deconvolutionMassTolerance.Within(precursorMZ.ToMass(precursorCharge), b.Item1.ToMass(b.Item2))))
                        {
                            isolatedStuff.Add((precursorMZ, precursorCharge));
                        }
                    }
                    else
                    {
                        var precursorMZ = ms2scan.SelectedIonMZ;
                        if (!isolatedStuff.Any(b => deconvolutionMassTolerance.Within(precursorMZ.ToMass(precursorCharge), b.Item1.ToMass(b.Item2))))
                        {
                            isolatedStuff.Add((precursorMZ, precursorCharge));
                        }
                    }
                }

                foreach (var heh in isolatedStuff)
                {
                    yield return(new Ms2ScanWithSpecificMass(ms2scan, heh.Item1, heh.Item2, fullFilePath));
                }
            }
        }
Beispiel #5
0
        public static void CreateAndWriteMyMzmlWithCalibratedSpectra(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string outputFile, bool writeIndexed)
        {
            var mzML = new Generated.mzMLType()
            {
                version = "1",
                cvList  = new Generated.CVListType()
            };

            mzML.cvList.count = "1";
            mzML.cvList.cv    = new Generated.CVType[1];
            mzML.cvList.cv[0] = new Generated.CVType()
            {
                URI      = @"https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo",
                fullName = "Proteomics Standards Initiative Mass Spectrometry Ontology",
                id       = "MS"
            };
            mzML.fileDescription = new Generated.FileDescriptionType()
            {
                fileContent = new Generated.ParamGroupType()
            };
            mzML.fileDescription.fileContent.cvParam    = new Generated.CVParamType[2];
            mzML.fileDescription.fileContent.cvParam[0] = new Generated.CVParamType()
            {
                accession = "MS:1000579" // MS1 Data
            };
            mzML.fileDescription.fileContent.cvParam[1] = new Generated.CVParamType()
            {
                accession = "MS:1000580" // MSn Data
            };
            mzML.softwareList = new Generated.SoftwareListType()
            {
                count    = "1",
                software = new Generated.SoftwareType[1]
            };

            // TODO: add the raw file fields
            mzML.softwareList.software[0] = new Generated.SoftwareType()
            {
                id      = "mzLib",
                version = "1",
                cvParam = new Generated.CVParamType[1]
            };
            mzML.softwareList.software[0].cvParam[0] = new Generated.CVParamType()
            {
                accession = "MS:1000799",
                value     = "mzLib"
            };

            // Leaving empty. Can't figure out the configurations.
            // ToDo: read instrumentConfigurationList from mzML file
            mzML.instrumentConfigurationList = new Generated.InstrumentConfigurationListType();

            mzML.dataProcessingList = new Generated.DataProcessingListType()
            {
                count          = "1",
                dataProcessing = new Generated.DataProcessingType[1]
            };
            // Only writing mine! Might have had some other data processing (but not if it is a raw file)
            // ToDo: read dataProcessingList from mzML file
            mzML.dataProcessingList.dataProcessing[0] = new Generated.DataProcessingType()
            {
                id = "mzLibProcessing"
            };
            mzML.run = new Generated.RunType()
            {
                chromatogramList = new Generated.ChromatogramListType()
                {
                    count        = "1",
                    chromatogram = new Generated.ChromatogramType[1]
                }
            };
            // ToDo: Finish the chromatogram writing!
            mzML.run.chromatogramList.chromatogram[0] = new Generated.ChromatogramType();

            mzML.run.spectrumList = new Generated.SpectrumListType()
            {
                count = (myMsDataFile.NumSpectra).ToString(CultureInfo.InvariantCulture),
                defaultDataProcessingRef = "mzLibProcessing",
                spectrum = new Generated.SpectrumType[myMsDataFile.NumSpectra]
            };

            // Loop over all spectra
            for (int i = 1; i <= myMsDataFile.NumSpectra; i++)
            {
                mzML.run.spectrumList.spectrum[i - 1] = new Generated.SpectrumType()
                {
                    defaultArrayLength = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size,

                    index = i.ToString(CultureInfo.InvariantCulture),
                    id    = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString(),

                    cvParam = new Generated.CVParamType[8]
                };
                mzML.run.spectrumList.spectrum[i - 1].cvParam[0] = new Generated.CVParamType();

                if (myMsDataFile.GetOneBasedScan(i).MsnOrder == 1)
                {
                    mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000579";
                }
                else if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > )
                {
                    var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >;
                    mzML.run.spectrumList.spectrum[i - 1].cvParam[0].accession = "MS:1000580";

                    // So needs a precursor!
                    mzML.run.spectrumList.spectrum[i - 1].precursorList = new Generated.PrecursorListType()
                    {
                        count     = 1.ToString(),
                        precursor = new Generated.PrecursorType[1]
                    };
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0] = new Generated.PrecursorType();
                    string precursorID = scanWithPrecursor.OneBasedPrecursorScanNumber.ToString();
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].spectrumRef     = precursorID;
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList = new Generated.SelectedIonListType()
                    {
                        count       = 1.ToString(),
                        selectedIon = new Generated.ParamGroupType[1]
                    };
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0] = new Generated.ParamGroupType()
                    {
                        cvParam = new Generated.CVParamType[3]
                    };
                    // Selected ion MZ
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[0] = new Generated.CVParamType()
                    {
                        name      = "selected ion m/z",
                        value     = scanWithPrecursor.SelectedIonMZ.ToString(CultureInfo.InvariantCulture),
                        accession = "MS:1000744"
                    };

                    // Charge State
                    if (scanWithPrecursor.SelectedIonChargeStateGuess.HasValue)
                    {
                        mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[1] = new Generated.CVParamType()
                        {
                            name      = "charge state",
                            value     = scanWithPrecursor.SelectedIonChargeStateGuess.Value.ToString(CultureInfo.InvariantCulture),
                            accession = "MS:1000041"
                        };
                    }

                    // Selected ion intensity
                    if (scanWithPrecursor.SelectedIonIntensity.HasValue)
                    {
                        mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[2] = new Generated.CVParamType()
                        {
                            name      = "peak intensity",
                            value     = scanWithPrecursor.SelectedIonIntensity.Value.ToString(CultureInfo.InvariantCulture),
                            accession = "MS:1000042"
                        };
                    }

                    MzRange isolationRange = scanWithPrecursor.IsolationRange;
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow = new Generated.ParamGroupType()
                    {
                        cvParam = new Generated.CVParamType[3]
                    };
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[0] = new Generated.CVParamType()
                    {
                        accession = "MS:1000827",
                        name      = "isolation window target m/z",
                        value     = isolationRange.Mean.ToString(CultureInfo.InvariantCulture)
                    };
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[1] = new Generated.CVParamType()
                    {
                        accession = "MS:1000828",
                        name      = "isolation window lower offset",
                        value     = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture)
                    };
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].isolationWindow.cvParam[2] = new Generated.CVParamType()
                    {
                        accession = "MS:1000829",
                        name      = "isolation window upper offset",
                        value     = (isolationRange.Width / 2).ToString(CultureInfo.InvariantCulture)
                    };
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation = new Generated.ParamGroupType()
                    {
                        cvParam = new Generated.CVParamType[1]
                    };
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0] = new Generated.CVParamType();

                    DissociationType dissociationType = scanWithPrecursor.DissociationType;

                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].accession = DissociationTypeAccessions[dissociationType];
                    mzML.run.spectrumList.spectrum[i - 1].precursorList.precursor[0].activation.cvParam[0].name      = DissociationTypeNames[dissociationType];
                }

                mzML.run.spectrumList.spectrum[i - 1].cvParam[1] = new Generated.CVParamType()
                {
                    name      = "ms level",
                    accession = "MS:1000511",
                    value     = myMsDataFile.GetOneBasedScan(i).MsnOrder.ToString(CultureInfo.InvariantCulture)
                };
                mzML.run.spectrumList.spectrum[i - 1].cvParam[2] = new Generated.CVParamType()
                {
                    name      = CentroidNames[myMsDataFile.GetOneBasedScan(i).IsCentroid],
                    accession = CentroidAccessions[myMsDataFile.GetOneBasedScan(i).IsCentroid]
                };
                if (PolarityNames.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityName) && PolarityAccessions.TryGetValue(myMsDataFile.GetOneBasedScan(i).Polarity, out string polarityAccession))
                {
                    mzML.run.spectrumList.spectrum[i - 1].cvParam[3] = new Generated.CVParamType()
                    {
                        name      = polarityName,
                        accession = polarityAccession
                    };
                }
                // Spectrum title
                mzML.run.spectrumList.spectrum[i - 1].cvParam[4] = new Generated.CVParamType()
                {
                    name      = "spectrum title",
                    accession = "MS:1000796",
                    value     = myMsDataFile.GetOneBasedScan(i).OneBasedScanNumber.ToString()
                };
                if ((myMsDataFile.GetOneBasedScan(i).MassSpectrum.Size) > 0)
                {
                    // Lowest observed mz
                    mzML.run.spectrumList.spectrum[i - 1].cvParam[5] = new Generated.CVParamType()
                    {
                        name      = "lowest observed m/z",
                        accession = "MS:1000528",
                        value     = myMsDataFile.GetOneBasedScan(i).MassSpectrum.FirstX.ToString(CultureInfo.InvariantCulture)
                    };

                    // Highest observed mz
                    mzML.run.spectrumList.spectrum[i - 1].cvParam[6] = new Generated.CVParamType()
                    {
                        name      = "highest observed m/z",
                        accession = "MS:1000527",
                        value     = myMsDataFile.GetOneBasedScan(i).MassSpectrum.LastX.ToString(CultureInfo.InvariantCulture)
                    };
                }

                // Total ion current
                mzML.run.spectrumList.spectrum[i - 1].cvParam[7] = new Generated.CVParamType()
                {
                    name      = "total ion current",
                    accession = "MS:1000285",
                    value     = myMsDataFile.GetOneBasedScan(i).TotalIonCurrent.ToString(CultureInfo.InvariantCulture)
                };

                // Retention time
                mzML.run.spectrumList.spectrum[i - 1].scanList = new Generated.ScanListType()
                {
                    count = "1",
                    scan  = new Generated.ScanType[1]
                };
                mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0] = new Generated.ScanType()
                {
                    cvParam = new Generated.CVParamType[3]
                };
                mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[0] = new Generated.CVParamType()
                {
                    name          = "scan start time",
                    accession     = "MS:1000016",
                    value         = myMsDataFile.GetOneBasedScan(i).RetentionTime.ToString(CultureInfo.InvariantCulture),
                    unitCvRef     = "UO",
                    unitAccession = "UO:0000031",
                    unitName      = "minute"
                };
                mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[1] = new Generated.CVParamType()
                {
                    name      = "filter string",
                    accession = "MS:1000512",
                    value     = myMsDataFile.GetOneBasedScan(i).ScanFilter
                };
                if (myMsDataFile.GetOneBasedScan(i).InjectionTime.HasValue)
                {
                    mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].cvParam[2] = new Generated.CVParamType()
                    {
                        name      = "ion injection time",
                        accession = "MS:1000927",
                        value     = myMsDataFile.GetOneBasedScan(i).InjectionTime.Value.ToString(CultureInfo.InvariantCulture)
                    };
                }
                if (myMsDataFile.GetOneBasedScan(i) is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > )
                {
                    var scanWithPrecursor = myMsDataFile.GetOneBasedScan(i) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >;
                    if (scanWithPrecursor.SelectedIonMonoisotopicGuessMz.HasValue)
                    {
                        mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam    = new Generated.UserParamType[1];
                        mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].userParam[0] = new Generated.UserParamType()
                        {
                            name  = "[mzLib]Monoisotopic M/Z:",
                            value = scanWithPrecursor.SelectedIonMonoisotopicGuessMz.Value.ToString(CultureInfo.InvariantCulture)
                        };
                    }
                }

                mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList = new Generated.ScanWindowListType()
                {
                    count      = 1,
                    scanWindow = new Generated.ParamGroupType[1]
                };
                mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0] = new Generated.ParamGroupType()
                {
                    cvParam = new Generated.CVParamType[2]
                };
                mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[0] = new Generated.CVParamType()
                {
                    name      = "scan window lower limit",
                    accession = "MS:1000501",
                    value     = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Minimum.ToString(CultureInfo.InvariantCulture)
                };
                mzML.run.spectrumList.spectrum[i - 1].scanList.scan[0].scanWindowList.scanWindow[0].cvParam[1] = new Generated.CVParamType()
                {
                    name      = "scan window upper limit",
                    accession = "MS:1000500",
                    value     = myMsDataFile.GetOneBasedScan(i).ScanWindowRange.Maximum.ToString(CultureInfo.InvariantCulture)
                };
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList = new Generated.BinaryDataArrayListType()
                {
                    // ONLY WRITING M/Z AND INTENSITY DATA, NOT THE CHARGE! (but can add charge info later)
                    // CHARGE (and other stuff) CAN BE IMPORTANT IN ML APPLICATIONS!!!!!
                    count           = 2.ToString(),
                    binaryDataArray = new Generated.BinaryDataArrayType[5]
                };

                // M/Z Data
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0] = new Generated.BinaryDataArrayType()
                {
                    binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitXarray()
                };
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].binary.Length / 3))).ToString(CultureInfo.InvariantCulture);
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam       = new Generated.CVParamType[3];
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[0]    = new Generated.CVParamType()
                {
                    accession = "MS:1000514",
                    name      = "m/z array"
                };
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[1] = new Generated.CVParamType()
                {
                    accession = "MS:1000523",
                    name      = "64-bit float"
                };
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[0].cvParam[2] = new Generated.CVParamType()
                {
                    accession = "MS:1000576",
                    name      = "no compression"
                };

                // Intensity Data
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1] = new Generated.BinaryDataArrayType()
                {
                    binary = myMsDataFile.GetOneBasedScan(i).MassSpectrum.Get64BitYarray()
                };
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].binary.Length / 3))).ToString(CultureInfo.InvariantCulture);
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam       = new Generated.CVParamType[3];
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[0]    = new Generated.CVParamType()
                {
                    accession = "MS:1000515",
                    name      = "intensity array"
                };
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[1] = new Generated.CVParamType()
                {
                    accession = "MS:1000523",
                    name      = "64-bit float"
                };
                mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[1].cvParam[2] = new Generated.CVParamType()
                {
                    accession = "MS:1000576",
                    name      = "no compression"
                };
                if (myMsDataFile.GetOneBasedScan(i).NoiseData != null)
                {
                    // mass
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2] = new Generated.BinaryDataArrayType()
                    {
                        binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataMass()
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].binary.Length / 3))).ToString(CultureInfo.InvariantCulture);
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam       = new Generated.CVParamType[3];
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[0]    = new Generated.CVParamType()
                    {
                        accession = "MS:1000786",
                        name      = "non-standard data array"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[1] = new Generated.CVParamType()
                    {
                        accession = "MS:1000523",
                        name      = "64-bit float"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].cvParam[2] = new Generated.CVParamType()
                    {
                        accession = "MS:1000576",
                        name      = "no compression"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam    = new Generated.UserParamType[1];
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[2].userParam[0] = new Generated.UserParamType()
                    {
                        name  = "kelleherCustomType",
                        value = "noise m/z"
                    };

                    // noise
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3] = new Generated.BinaryDataArrayType()
                    {
                        binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataNoise()
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].binary.Length / 3))).ToString(CultureInfo.InvariantCulture);
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam       = new Generated.CVParamType[3];
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[0]    = new Generated.CVParamType()
                    {
                        accession = "MS:1000786",
                        name      = "non-standard data array"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[1] = new Generated.CVParamType()
                    {
                        accession = "MS:1000523",
                        name      = "64-bit float"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].cvParam[2] = new Generated.CVParamType()
                    {
                        accession = "MS:1000576",
                        name      = "no compression"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam    = new Generated.UserParamType[1];
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[3].userParam[0] = new Generated.UserParamType()
                    {
                        name  = "kelleherCustomType",
                        value = "noise baseline"
                    };

                    // baseline
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4] = new Generated.BinaryDataArrayType()
                    {
                        binary = myMsDataFile.GetOneBasedScan(i).Get64BitNoiseDataBaseline()
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].encodedLength = (4 * Math.Ceiling(((double)mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].binary.Length / 3))).ToString(CultureInfo.InvariantCulture);
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam       = new Generated.CVParamType[3];
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[0]    = new Generated.CVParamType()
                    {
                        accession = "MS:1000786",
                        name      = "non-standard data array"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[1] = new Generated.CVParamType()
                    {
                        accession = "MS:1000523",
                        name      = "64-bit float"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].cvParam[2] = new Generated.CVParamType()
                    {
                        accession = "MS:1000576",
                        name      = "no compression"
                    };
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam    = new Generated.UserParamType[1];
                    mzML.run.spectrumList.spectrum[i - 1].binaryDataArrayList.binaryDataArray[4].userParam[0] = new Generated.UserParamType()
                    {
                        name  = "kelleherCustomType",
                        value = "noise intensity"
                    };
                }
            }

            if (writeIndexed)
            {
                throw new NotImplementedException("Writing indexed mzMLs not yet supported");
            }
            else
            {
                using (TextWriter writer = new StreamWriter(outputFile))
                {
                    mzmlSerializer.Serialize(writer, mzML);
                }
            }
        }
        protected override MetaMorpheusEngineResults RunSpecific()
        {
            Status("Extracting data points:");
            // The final training point list

            int numMs1MassChargeCombinationsConsidered = 0;
            int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            int numMs2MassChargeCombinationsConsidered = 0;
            int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0;
            List <LabeledMs1DataPoint> Ms1List = new List <LabeledMs1DataPoint>();
            List <LabeledMs2DataPoint> Ms2List = new List <LabeledMs2DataPoint>();

            int numIdentifications = goodIdentifications.Count;

            // Loop over identifications

            HashSet <string> sequences = new HashSet <string>();

            object lockObj  = new object();
            object lockObj2 = new object();

            Parallel.ForEach(Partitioner.Create(0, numIdentifications), fff =>
            {
                for (int matchIndex = fff.Item1; matchIndex < fff.Item2; matchIndex++)
                {
                    PeptideSpectralMatch identification = goodIdentifications[matchIndex];

                    // Each identification has an MS2 spectrum attached to it.
                    int ms2scanNumber = identification.ScanNumber;
                    int peptideCharge = identification.ScanPrecursorCharge;
                    if (identification.FullSequence == null)
                    {
                        continue;
                    }

                    var representativeSinglePeptide = identification.CompactPeptides.First().Value.Item2.First();

                    // Get the peptide, don't forget to add the modifications!!!!
                    var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas;
                    if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.allModsOneIsNterminus.Any(b => b.Value.neutralLosses.Count != 1 || b.Value.neutralLosses.First() != 0))
                    {
                        continue;
                    }
                    Proteomics.Peptide coolPeptide = new Proteomics.Peptide(SequenceWithChemicalFormulas);

                    var ms2tuple = SearchMS2Spectrum(myMsDataFile.GetOneBasedScan(ms2scanNumber) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >, coolPeptide, peptideCharge, identification);

                    // If MS2 has low evidence for peptide, skip and go to next one
                    if (ms2tuple.Item4 < numFragmentsNeededForEveryIdentification)
                    {
                        continue;
                    }

                    lock (lockObj2)
                    {
                        Ms2List.AddRange(ms2tuple.Item1);
                        numMs2MassChargeCombinationsConsidered += ms2tuple.Item2;
                        numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms2tuple.Item3;
                        if (sequences.Contains(identification.FullSequence))
                        {
                            continue; // Do not search same sequence multiple times in MS1 scans
                        }
                        sequences.Add(identification.FullSequence);
                    }

                    // Calculate isotopic distribution of the full peptide
                    var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), fineResolutionForIsotopeDistCalculation, 0.001);

                    double[] theoreticalMasses      = dist.Masses.ToArray();
                    double[] theoreticalIntensities = dist.Intensities.ToArray();

                    Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x)));

                    var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification);

                    var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification);

                    lock (lockObj)
                    {
                        Ms1List.AddRange(ms1tupleBack.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3;
                        Ms1List.AddRange(ms1tupleForward.Item1);
                        numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2;
                        numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3;
                    }
                }
            });

            return(new DataPointAquisitionResults(this,
                                                  Ms1List,
                                                  Ms2List,
                                                  numMs1MassChargeCombinationsConsidered,
                                                  numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks,
                                                  numMs2MassChargeCombinationsConsidered,
                                                  numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks
                                                  ));
        }
Beispiel #7
0
        private (List <PeptideSpectralMatch>, DataPointAquisitionResults) GetDataAcquisitionResults(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, ICommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode;

            if (initPrecTol is PpmTolerance)
            {
                searchMode = new SinglePpmAroundZeroSearchMode(initPrecTol.Value);
            }
            else
            {
                searchMode = new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);
            }

            FragmentTypes fragmentTypesForCalibration = FragmentTypes.None;

            if (combinedParameters.BIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b;
            }
            if (combinedParameters.YIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y;
            }
            if (combinedParameters.CIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c;
            }
            if (combinedParameters.ZdotIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot;
            }

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> lp = new List <ProductType>();

            if (combinedParameters.BIons)
            {
                lp.Add(ProductType.B);
            }
            if (combinedParameters.YIons)
            {
                lp.Add(ProductType.Y);
            }
            if (combinedParameters.CIons)
            {
                lp.Add(ProductType.C);
            }
            if (combinedParameters.ZdotIons)
            {
                lp.Add(ProductType.Zdot);
            }

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchMode, false, combinedParameters, initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> allPsms = allPsmsArray.ToList();

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> {
                combinedParameters.DigestionParams
            }, combinedParameters.ReportAllAmbiguity, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run()).CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
                }
            }

            allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, false, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                Warn("No PSMs below 1% FDR observed!");
                return(new List <PeptideSpectralMatch>(), null);
            }

            var dissociationTypes = MetaMorpheusEngine.DetermineDissociationType(lp);

            foreach (var psm in allPsms)
            {
                var    theScan          = myMsDataFile.GetOneBasedScan(psm.ScanNumber);
                double thePrecursorMass = psm.ScanPrecursorMass;

                foreach (var huh in lp)
                {
                    var ionMasses = psm.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> {
                        huh
                    });
                    Array.Sort(ionMasses);
                    List <double> matchedIonMassesList    = new List <double>();
                    List <double> productMassErrorDaList  = new List <double>();
                    List <double> productMassErrorPpmList = new List <double>();
                    LocalizationEngine.MatchIons(theScan, initProdTol, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, false);
                    double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray();
                    psm.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches);
                    psm.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray());
                    psm.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray());
                }
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                initProdTol,
                CalibrationParameters.NumFragmentsNeededForEveryIdentification,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification,
                fragmentTypesForCalibration,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(goodIdentifications, currentResult);
        }