public FitBasedLogLikelihoodRatioScorer(ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge) { _ms2Spec = ms2Spec; _tolerance = tolerance; _minCharge = minCharge; _maxCharge = maxCharge; _baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD; }
public LikelihoodScorer(LikelihoodScoringModel model, ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge, bool massErrorScore = true) : base(ms2Spec, tolerance, minCharge, maxCharge, 0.1) { //_model = model; //var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity) * 0.1; //var medIntensity = ms2Spec.Peaks.Select(p => p.Intensity).Median(); //_refIntensity = Math.Min(medIntensity, refIntensity); _refIntensity = GetRefIntensity(ms2Spec.Peaks); _includeMassErrorScore = massErrorScore; }
public void OutputStatistics(ProductSpectrum spectrum, Sequence sequence) { var baseIonTypes = spectrum.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCid : BaseIonTypesEtd; var cleavages = sequence.GetInternalCleavages().ToArray(); var tolerance = new Tolerance(10); var maxIntensity = spectrum.Peaks.Max(p => p.Intensity); foreach (var c in cleavages) { foreach (var baseIonType in baseIonTypes) { var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; for (int charge = MinCharge; charge <= MaxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); var observedPeaks = spectrum.GetAllIsotopePeaks(ion, tolerance, RelativeIsotopeIntensityThreshold); if (observedPeaks == null) continue; var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); // representative peak intensity var ionPeakIntensity = observedPeaks[mostAbundantIsotopeIndex].Intensity; // calc. correlation var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } var corrCoeff = FitScoreCalculator.GetPearsonCorrelation(isotopomerEnvelope, observedIntensities); // mz error var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex); var errorPpm = ((observedPeaks[mostAbundantIsotopeIndex].Mz - mostAbundantIsotopeMz)/ mostAbundantIsotopeMz)*1e6; } } } }
public double GetScoreTest(Sequence sequence, ProductSpectrum spectrum) { var score = 0d; var tol = new Tolerance(10); var matchCounter = new CorrMatchedPeakCounter(spectrum,tol,1,20); var prefixCompArr = sequence.GetPrefixCompositions().ToArray(); foreach (var c in prefixCompArr) { if(c.Equals(Composition.Zero)) Console.WriteLine("found zero"); } var suffixCompArr = sequence.GetSuffixCompositions().ToArray(); for (int i = 0; i < prefixCompArr.Length; i++) { score += matchCounter.GetFragmentScore(prefixCompArr[i], suffixCompArr[i]); } return score; }
public TagMatchFinder( ProductSpectrum spec, IScorer ms2Scorer, LcMsPeakMatrix featureFinder, string proteinSequence, Tolerance tolerance, AminoAcidSet aaSet, double maxSequenceMass) { _spec = spec; _ms2Scorer = ms2Scorer; _featureFinder = featureFinder; _proteinSequence = proteinSequence; _tolerance = tolerance; _aaSet = aaSet; _maxSequenceMass = maxSequenceMass; }
public CompositeScorerBasedOnDeconvolutedSpectrum(DeconvolutedSpectrum deconvolutedSpectrum, ProductSpectrum spec, Tolerance productTolerance, IMassBinning comparer) : base(deconvolutedSpectrum, productTolerance) { ReferencePeakIntensity = GetRefIntensity(spec.Peaks); _comparer = comparer; _massBinToPeakMap = new Dictionary<int, DeconvolutedPeak>(); foreach (var p in deconvolutedSpectrum.Peaks) { var mass = p.Mz; var deltaMass = productTolerance.GetToleranceAsDa(mass, 1); var minMass = mass - deltaMass; var maxMass = mass + deltaMass; var binNum = comparer.GetBinNumber(mass); if (binNum < 0) { binNum = comparer.GetBinNumber(minMass); if (binNum < 0) binNum = comparer.GetBinNumber(maxMass); } // filter out if (binNum < 0) continue; UpdateDeconvPeak(binNum, p as DeconvolutedPeak); // going up for (var nextBinNum = binNum + 1; nextBinNum < comparer.NumberOfBins; nextBinNum++) { var nextBinMass = comparer.GetMassStart(nextBinNum); if (minMass < nextBinMass && nextBinMass < maxMass) UpdateDeconvPeak(nextBinNum, p as DeconvolutedPeak); //_ionMassChkBins[nextBinNum] = true; else break; } // going down for (var prevBinNum = binNum - 1; prevBinNum < comparer.NumberOfBins; prevBinNum--) { var prevBinMass = comparer.GetMassEnd(prevBinNum); if (minMass < prevBinMass && prevBinMass < maxMass) UpdateDeconvPeak(prevBinNum, p as DeconvolutedPeak); //_ionMassChkBins[prevBinNum] = true; else break; } } }
public double ComputeScore(ProductSpectrum ms2Spec, Sequence sequence) { _baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD; _sequence = sequence; _ms2Spec = ms2Spec; if (!_rankingInfo.TryGetValue(ms2Spec.ScanNum, out _peakRanking)) { _peakRanking = ArrayUtil.GetRankings(ms2Spec.Peaks.Select(peak => peak.Intensity)); _rankingInfo.Add(ms2Spec.ScanNum, _peakRanking); } FindMatchedPeaks(); var score = GetRankSumScore(); return score; }
public IcScores GetScores(ProductSpectrum spec, string seqStr, Composition composition, int charge, int ms2ScanNum) { if (spec == null) return null; var scorer = new CompositeScorer(spec, Tolerance, MinProductCharge, Math.Min(MaxProductCharge, charge)); var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm); if (seqGraph == null) return null; var bestScore = double.NegativeInfinity; Tuple<double, string> bestScoreAndModifications = null; var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); if (!protCompositionWithH2O.Equals(composition)) continue; var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scorer); var curScore = curScoreAndModifications.Item1; if (!(curScore > bestScore)) continue; bestScoreAndModifications = curScoreAndModifications; bestScore = curScore; } if (bestScoreAndModifications == null) return null; var modifications = bestScoreAndModifications.Item2; var seqObj = Sequence.CreateSequence(seqStr, modifications, AminoAcidSet); double score; int nMatchedFragments; GetCompositeScores(seqObj, charge, ms2ScanNum, out score, out nMatchedFragments); return new IcScores(nMatchedFragments, score, modifications); }
/// <summary> /// Handle a single spectrum element and child nodes /// Called by ReadSpectrumList (xml hierarchy) /// </summary> /// <param name="reader">XmlReader that is only valid for the scope of the single spectrum element</param> /// <param name="includePeaks">Whether to read binary data arrays</param> private Spectrum ReadSpectrum(XmlReader reader, bool includePeaks = true) { reader.MoveToContent(); string index = reader.GetAttribute("index"); //Console.WriteLine("Reading spectrum indexed by " + index); // This is correct for Thermo files converted by msConvert, but need to implement for others as well string spectrumId = reader.GetAttribute("id"); // Native ID in mzML_1.1.0; unique identifier in mzML_1.0.0, often same as nativeID string nativeId = spectrumId; if (_version == MzML_Version.mzML1_0_0) { nativeId = reader.GetAttribute("nativeID"); // Native ID in mzML_1.0.0 } int scanNum = -1; // If a random access reader, there is already a scan number stored, based on the order of the index. Use it instead. if (_randomAccess) { scanNum = (int) (_spectrumOffsets.NativeToIdMap[nativeId]); } else { scanNum = (int)(_artificialScanNum++); // Interpret the NativeID (if the format has an interpreter) and use it instead of the artificial number. // TODO: Better handling than the artificial ID for other nativeIDs (ones currently not supported) int num = 0; if (NativeIdConversion.TryGetScanNumberInt(nativeId, out num)) { scanNum = num; } } int defaultArraySize = Convert.ToInt32(reader.GetAttribute("defaultArrayLength")); reader.ReadStartElement("spectrum"); // Throws exception if we are not at the "spectrum" tag. bool is_ms_ms = false; int msLevel = 0; bool centroided = false; double tic = 0; List<Precursor> precursors = new List<Precursor>(); List<ScanData> scans = new List<ScanData>(); List<BinaryDataArray> bdas = new List<BinaryDataArray>(); while (reader.ReadState == ReadState.Interactive) { // Handle exiting out properly at EndElement tags if (reader.NodeType != XmlNodeType.Element) { reader.Read(); continue; } ////////////////////////////////////////////////////////////////////////////////////// /// /// MS1 Spectra: only need Spectrum data: scanNum, MSLevel, ElutionTime, mzArray, IntensityArray /// /// MS2 Spectra: use ProductSpectrum; adds ActivationMethod and IsolationWindow /// ////////////////////////////////////////////////////////////////////////////////////// switch (reader.Name) { case "referenceableParamGroupRef": // Schema requirements: zero to many instances of this element reader.Skip(); break; case "cvParam": // Schema requirements: zero to many instances of this element /* MAY supply a *child* term of MS:1000465 (scan polarity) only once * e.g.: MS:1000129 (negative scan) * e.g.: MS:1000130 (positive scan) * MUST supply a *child* term of MS:1000559 (spectrum type) only once * e.g.: MS:1000322 (charge inversion mass spectrum) * e.g.: MS:1000325 (constant neutral gain spectrum) * e.g.: MS:1000326 (constant neutral loss spectrum) * e.g.: MS:1000328 (e/2 mass spectrum) * e.g.: MS:1000341 (precursor ion spectrum) * e.g.: MS:1000579 (MS1 spectrum) * e.g.: MS:1000580 (MSn spectrum) * e.g.: MS:1000581 (CRM spectrum) * e.g.: MS:1000582 (SIM spectrum) * e.g.: MS:1000583 (SRM spectrum) * e.g.: MS:1000620 (PDA spectrum) * e.g.: MS:1000627 (selected ion current chromatogram) * e.g.: MS:1000789 (enhanced multiply charged spectrum) * e.g.: MS:1000790 (time-delayed fragmentation spectrum) * et al. * MUST supply term MS:1000525 (spectrum representation) or any of its children only once * e.g.: MS:1000127 (centroid spectrum) * e.g.: MS:1000128 (profile spectrum) * MAY supply a *child* term of MS:1000499 (spectrum attribute) one or more times * e.g.: MS:1000285 (total ion current) * e.g.: MS:1000497 (zoom scan) * e.g.: MS:1000504 (base peak m/z) * e.g.: MS:1000505 (base peak intensity) * e.g.: MS:1000511 (ms level) * e.g.: MS:1000527 (highest observed m/z) * e.g.: MS:1000528 (lowest observed m/z) * e.g.: MS:1000618 (highest observed wavelength) * e.g.: MS:1000619 (lowest observed wavelength) * e.g.: MS:1000796 (spectrum title) * et al. */ switch (reader.GetAttribute("accession")) { case "MS:1000127": // name="centroid spectrum" centroided = true; break; case "MS:1000128": // name="profile spectrum" centroided = false; break; case "MS:1000511": // name="ms level" msLevel = Convert.ToInt32(reader.GetAttribute("value")); break; case "MS:1000579": // name="MS1 spectrum" is_ms_ms = false; break; case "MS:1000580": // name="MSn spectrum" is_ms_ms = true; break; case "MS:1000285": // name="total ion current" tic = Convert.ToDouble(reader.GetAttribute("value")); break; } reader.Read(); // Consume the cvParam element (no child nodes) break; case "userParam": // Schema requirements: zero to many instances of this element reader.Skip(); break; case "spectrumDescription": // mzML_1.0.0 compatibility // Schema requirements: one instance of this element ReadSpectrumDescription(reader.ReadSubtree(), ref scans, ref precursors, out centroided); reader.ReadEndElement(); // "spectrumDescription" must have child nodes break; case "scanList": // Schema requirements: zero to one instances of this element scans.AddRange(ReadScanList(reader.ReadSubtree())); reader.ReadEndElement(); // "scanList" must have child nodes break; case "precursorList": // Schema requirements: zero to one instances of this element precursors.AddRange(ReadPrecursorList(reader.ReadSubtree())); reader.ReadEndElement(); // "precursorList" must have child nodes break; case "productList": // Schema requirements: zero to one instances of this element reader.Skip(); break; case "binaryDataArrayList": // Schema requirements: zero to one instances of this element if (includePeaks) { bdas.AddRange(ReadBinaryDataArrayList(reader.ReadSubtree(), defaultArraySize)); reader.ReadEndElement(); // "binaryDataArrayList" must have child nodes } else { reader.Skip(); } break; default: reader.Skip(); break; } } reader.Close(); // Process the spectrum data ScanData scan = new ScanData(); Spectrum spectrum; BinaryDataArray mzs = new BinaryDataArray(); BinaryDataArray intensities = new BinaryDataArray(); foreach (var bda in bdas) { if (bda.ArrayType == ArrayType.m_z_array) { mzs = bda; } else if (bda.ArrayType == ArrayType.intensity_array) { intensities = bda; } } if (!centroided && includePeaks) { // Centroid spectrum // ProteoWizard var centroider = new Centroider(mzs.Data, intensities.Data); double[] centroidedMzs, centroidedIntensities; centroider.GetCentroidedData(out centroidedMzs, out centroidedIntensities); mzs.Data = centroidedMzs; intensities.Data = centroidedIntensities; } if (scans.Count == 1) { scan = scans[0]; } else if (scans.Count > 1) { // TODO: Should do something else to appropriately handle combinations... scan = scans[0]; } if (is_ms_ms) { Precursor precursor = new Precursor(); if (precursors.Count == 1) { precursor = precursors[0]; } else if (precursors.Count > 1) { // TODO: Should do something else to appropriately handle multiple precursors... precursor = precursors[0]; } SelectedIon ion = new SelectedIon(); if (precursor.Ions.Count == 1) { ion = precursor.Ions[0]; } else if (precursor.Ions.Count > 1) { // TODO: Should do something else to appropriately handle multiple selected ions... ion = precursor.Ions[0]; } var pspectrum = new ProductSpectrum(mzs.Data, intensities.Data, scanNum); pspectrum.ActivationMethod = precursor.Activation; // Select mz value to use based on presence of a Thermo-specific user param. // The user param has a slightly higher precision, if that matters. double mz = scan.MonoisotopicMz == 0.0 ? ion.SelectedIonMz : scan.MonoisotopicMz; pspectrum.IsolationWindow = new IsolationWindow(precursor.IsolationWindowTargetMz, precursor.IsolationWindowLowerOffset, precursor.IsolationWindowUpperOffset, mz, ion.Charge); //pspectrum.IsolationWindow.OldCharge = ion.OldCharge; //pspectrum.IsolationWindow.SelectedIonMz = ion.SelectedIonMz; spectrum = pspectrum; } else { spectrum = new Spectrum(mzs.Data, intensities.Data, scanNum); } spectrum.MsLevel = msLevel; spectrum.ElutionTime = scan.StartTime; spectrum.NativeId = nativeId; spectrum.TotalIonCurrent = tic; return spectrum; }
private void GetNodeStatistics(bool isDecoy, ProductSpectrum ms2Spec, Sequence sequence, StreamWriter writer) //, StreamWriter mzErrorWriter) { if (ms2Spec == null) return; if (sequence == null) return; //var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity) * 0.01; //refIntensity = Math.Min(ms2Spec.Peaks.Select(p => p.Intensity).Median(), refIntensity); var BaseIonTypesCID = new[] { BaseIonType.B, BaseIonType.Y }; var BaseIonTypesETD = new[] { BaseIonType.C, BaseIonType.Z }; var tolerance = new Tolerance(15); var minCharge = 1; var maxCharge = 20; var baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD; var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity); var activationMethodFlag = ms2Spec.ActivationMethod == ActivationMethod.ETD ? 1 : 2; var cleavages = sequence.GetInternalCleavages(); var prevPrefixFragMass = 0d; var prevPrefixObsIonMass = 0d; var prevPrefixObsIonCharge = 0; var prevPrefixObsIonIntensity = 0d; var prevSuffixFragMass = 0d; var prevSuffixObsIonMass = 0d; var prevSuffixObsIonCharge = 0; var prevSuffixObsIonIntensity = 0d; var nComplementaryFrags = 0; var cleavageIndex = 0; foreach (var c in cleavages) { var bothObs = true; foreach (var baseIonType in baseIonTypes) { var peakType = baseIonType.IsPrefix ? 1 : 2; // unexplained var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; var curFragMass = fragmentComposition.Mass; var curObsIonMass = 0d; var curObsIonCharge = 0; var curObsIonDist = 1.0d; var curObsIonCorr = 0d; var curObsIonIntensity = 0d; var ionMatch = false; for (var charge = minCharge; charge <= maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); var isotopePeaks = ms2Spec.GetAllIsotopePeaks(ion, tolerance, 0.1); if (isotopePeaks == null) continue; var distCorr = AbstractFragmentScorer.GetDistCorr(ion, isotopePeaks); if (distCorr.Item2 < 0.7 && distCorr.Item1 > 0.07) continue; var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var mostAbuPeak = isotopePeaks[mostAbundantIsotopeIndex]; var summedIntensity = isotopePeaks.Where(p => p != null).Sum(p => p.Intensity); var intScore = summedIntensity/refIntensity; //var intScore = mostAbuPeak.Intensity / medIntensity; //var intScore = summedIntensity / refIntensity; if (ionMatch == false || curObsIonIntensity < intScore) { curObsIonMass = Ion.GetMonoIsotopicMass(mostAbuPeak.Mz, charge, mostAbundantIsotopeIndex); curObsIonCharge = charge; curObsIonCorr = distCorr.Item2; curObsIonDist = distCorr.Item1; curObsIonIntensity = intScore; } ionMatch = true; } if (!ionMatch) { bothObs = false; continue; } writer.Write(activationMethodFlag); writer.Write("\t"); writer.Write(peakType); writer.Write("\t"); writer.Write("{0:0.000}", curFragMass); writer.Write("\t"); writer.Write("{0}", curObsIonCharge); writer.Write("\t"); writer.Write("{0:0.000}", curObsIonDist); writer.Write("\t"); writer.Write("{0:0.000}", curObsIonCorr); writer.Write("\t"); writer.Write("{0:0.000}", curObsIonIntensity); writer.Write("\t"); writer.Write("{0:0.000}", (Math.Abs(curFragMass - curObsIonMass)/curFragMass)*1e6); writer.Write("\n"); // mz error output /* if (baseIonType.IsPrefix && prevPrefixFragMass > 0 & prevPrefixObsIonMass > 0) { var aaMass = Math.Abs(prevPrefixFragMass - curFragMass); var massError = Math.Abs(Math.Abs(prevPrefixObsIonMass - curObsIonMass) - aaMass); var massErrorPpm = (massError / curObsIonMass) * 1e6; mzErrorWriter.WriteLine("{0}\t{1:0.000}\t{2}", activationMethodFlag, massErrorPpm, Math.Abs(prevPrefixObsIonCharge - curObsIonCharge)); } else if (prevSuffixFragMass > 0 & prevSuffixObsIonMass > 0) { var aaMass = Math.Abs(prevSuffixFragMass - curFragMass); var massError = Math.Abs(Math.Abs(prevSuffixObsIonMass - curObsIonMass) - aaMass); var massErrorPpm = (massError / curObsIonMass) * 1e6; mzErrorWriter.WriteLine("{0}\t{1:0.000}\t{2}", activationMethodFlag, massErrorPpm, Math.Abs(prevSuffixObsIonCharge - curObsIonCharge)); } */ if (baseIonType.IsPrefix) { prevPrefixFragMass = curFragMass; prevPrefixObsIonMass = curObsIonMass; prevPrefixObsIonCharge = curObsIonCharge; prevPrefixObsIonIntensity = curObsIonIntensity; //Array.Copy(curObsIonMass, prevPrefixObsIonMass, curObsIonMass.Length); } else { prevSuffixFragMass = curFragMass; prevSuffixObsIonMass = curObsIonMass; prevSuffixObsIonCharge = curObsIonCharge; prevSuffixObsIonIntensity = curObsIonIntensity; //Array.Copy(curObsIonMass, prevSuffixObsIonMass, curObsIonMass.Length); } } if (bothObs) { //pairWriter.Write("{0}\t{1}\t", prevPrefixObsIonIntensity, prevSuffixObsIonIntensity); nComplementaryFrags++; } cleavageIndex++; } Console.WriteLine("{0}\t{1}", nComplementaryFrags, sequence.Count); //if (!isDecoy) Console.WriteLine("{0}", totalExplainedAbundanceRatio); }
private void GetMatchStatistics(ProductSpectrum ms2Spec, Sequence sequence, int parentIonCharge, StreamWriter writer) { if (ms2Spec == null) return; if (sequence == null) return; var BaseIonTypesCID = new[] { BaseIonType.B, BaseIonType.Y }; var BaseIonTypesETD = new[] { BaseIonType.C, BaseIonType.Z }; var tolerance = new Tolerance(12); var MinProductCharge = 1; var MaxProductCharge = Math.Min(parentIonCharge+2, 20); var baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD; var refIntensity = CompositeScorer.GetRefIntensity(ms2Spec.Peaks); var activationMethodFlag = ms2Spec.ActivationMethod == ActivationMethod.ETD ? 2 : 1; var cleavages = sequence.GetInternalCleavages(); var nComplementaryFrags = 0; var prefixStat = new FragmentStat(); var suffixStat = new FragmentStat(); var minMz = ms2Spec.Peaks.First().Mz; var maxMz = ms2Spec.Peaks.Last().Mz; var cleavageIndex = 0; var preFixIonCheck = new bool[sequence.Count + 1]; var sufFixIonCheck = new bool[sequence.Count + 1]; foreach (var c in cleavages) { var prefixHit = false; var suffixHit = false; foreach (var baseIonType in baseIonTypes) { var stat = baseIonType.IsPrefix ? prefixStat : suffixStat; var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; if (fragmentComposition.Mass < ms2Spec.Peaks[0].Mz) continue; var curFragMass = fragmentComposition.Mass; /*var curObsIonCharge = 0; var curObsIonDist = 1.0d; var curObsIonCorr = 0d; var curObsIonIntensity = 0d; var curObsIonMassError = 0d;*/ var mostAbundantIsotopeIndex = fragmentComposition.GetMostAbundantIsotopeZeroBasedIndex(); var fragmentIonMostAbuMass = fragmentComposition.Mass + Constants.C13MinusC12 * mostAbundantIsotopeIndex; var maxCharge = (int)Math.Floor(fragmentIonMostAbuMass / (minMz - Constants.Proton)); var minCharge = (int)Math.Ceiling(fragmentIonMostAbuMass / (maxMz - Constants.Proton)); if (maxCharge < 1 || maxCharge > MaxProductCharge) maxCharge = MaxProductCharge; if (minCharge < 1 || minCharge < MinProductCharge) minCharge = MinProductCharge; //var ionMatch = false; for (var charge = minCharge; charge <= maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); var isotopePeaks = ms2Spec.GetAllIsotopePeaks(ion, tolerance, 0.1); if (isotopePeaks == null) continue; var distCorr = CompositeScorer.GetDistCorr(ion, isotopePeaks); if (distCorr.Item2 < 0.7 && distCorr.Item1 > 0.03) continue; var mostAbuPeak = isotopePeaks[mostAbundantIsotopeIndex]; var intScore = mostAbuPeak.Intensity / refIntensity; /* if (ionMatch == false || curObsIonIntensity < intScore) { curObsIonCharge = charge; curObsIonCorr = distCorr.Item2; curObsIonDist = distCorr.Item1; curObsIonIntensity = intScore; var mostAbuPeakMz = Ion.GetIsotopeMz(curFragMass, charge, mostAbundantIsotopeIndex); curObsIonMassError = (Math.Abs(mostAbuPeak.Mz - mostAbuPeakMz) / mostAbuPeakMz) * 1e6; //var curObsIonMass = Ion.GetMonoIsotopicMass(mostAbuPeak.Mz, charge, mostAbundantIsotopeIndex); //curObsIonMassError = (Math.Abs(curFragMass - curObsIonMass) / curFragMass) * 1e6; } ionMatch = true; */ var mostAbuPeakMz = Ion.GetIsotopeMz(curFragMass, charge, mostAbundantIsotopeIndex); var curObsIonMassError = (Math.Abs(mostAbuPeak.Mz - mostAbuPeakMz) / mostAbuPeakMz) * 1e6; stat.Count++; stat.Intensity += Math.Min(intScore, 1.0); stat.Corr += distCorr.Item2; stat.Dist += distCorr.Item1; stat.MassError += curObsIonMassError; if (baseIonType.IsPrefix) prefixHit = true; else suffixHit = true; } //if (!ionMatch) continue; } if (prefixHit) preFixIonCheck[cleavageIndex] = true; if (suffixHit) sufFixIonCheck[cleavageIndex] = true; if (prefixHit && suffixHit) { nComplementaryFrags++; } cleavageIndex++; } var preContCount = 0; var sufContCount = 0; for (var i = 0; i < preFixIonCheck.Length - 1; i++) { if (preFixIonCheck[i] && preFixIonCheck[i + 1]) preContCount++; if (sufFixIonCheck[i] && sufFixIonCheck[i + 1]) sufContCount++; } writer.Write(activationMethodFlag); writer.Write("\t"); writer.Write(sequence.Composition.Mass); writer.Write("\t"); writer.Write(sequence.Count); writer.Write("\t"); writer.Write(nComplementaryFrags); writer.Write("\t"); writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", prefixStat.Count, preContCount, prefixStat.Intensity, prefixStat.Corr, prefixStat.Dist, prefixStat.MassError); writer.Write("\t"); writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", suffixStat.Count, sufContCount, suffixStat.Intensity, suffixStat.Corr, suffixStat.Dist, suffixStat.MassError); writer.Write("\n"); }
public CorrMatchedPeakCounter(ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge, double corrScoreThreshold = 0.7): base(ms2Spec, tolerance, minCharge, maxCharge, 0.1) { _corrScoreThreshold = corrScoreThreshold; }
/// <summary> /// Reads the mass spectrum with the specified scanNum from the raw file /// </summary> /// <param name="scanNum">scan number</param> /// <param name="includePeaks">whether to include peak data</param> /// <returns>mass spectrum</returns> public Spectrum ReadMassSpectrum(int scanNum, bool includePeaks = true) { var scanInfo = GetScanInfo(scanNum); // default empty arrays, if peak data not requested. double[] mzArr = new double[]{}; double[] intensityArr = new double[]{}; if (includePeaks) { _msfileReader.GetScanData(scanNum, out mzArr, out intensityArr, 0, true); } var elutionTime = RtFromScanNum(scanNum); var nativeId = "controllerType=0 controllerNumber=1 scan=" + scanNum; // Call scanInfo.MSLevel in order to update dictionary _msLevel var msLevel = ReadMsLevel(scanNum); if (msLevel == 1) return new Spectrum(mzArr, intensityArr, scanNum) { ElutionTime = elutionTime, TotalIonCurrent = scanInfo.TotalIonCurrent, NativeId = nativeId, }; var isolationWindow = ReadPrecursorInfo(scanNum); var productSpec = new ProductSpectrum(mzArr, intensityArr, scanNum) { MsLevel = scanInfo.MSLevel, ElutionTime = elutionTime, TotalIonCurrent = scanInfo.TotalIonCurrent, NativeId = nativeId, ActivationMethod = GetActivationMethod(scanNum), IsolationWindow = isolationWindow }; return productSpec; }
public IList<SpectrumMatch> Read() { var specMatches = new List<SpectrumMatch>(); var file = File.ReadLines(_fileName); var mgfState = MgfState.Label; var sequence = ""; int scanNum = 0, charge = 0; var peaks = new List<Peak>(); var peptideSet = new HashSet<string>(); foreach (var line in file) { switch (mgfState) { case MgfState.Label: if (line == "BEGIN IONS") mgfState = MgfState.Parameter; else throw new FormatException("Invalid MGF file."); break; case MgfState.Parameter: var parameter = line.Split('='); if (parameter.Length < 2) throw new FormatException("Invalid line in MGF file: " + line); if (parameter[0] == "SEQ") sequence = parameter[1]; else if (parameter[0] == "SCANS") scanNum = Convert.ToInt32(parameter[1]); else if (parameter[0] == "CHARGE") { var chargeStr = parameter[1].Substring(0, parameter[1].Length - 1); charge = Convert.ToInt32(chargeStr); mgfState = MgfState.Peak; if (sequence == "" || scanNum == 0 || charge == 0) throw new FormatException("Incomplete spectrum entry."); } break; case MgfState.Peak: if (line == "END IONS") { if (peaks.Count == 0) throw new FormatException("Empty peak list."); mgfState = MgfState.Label; if (peptideSet.Contains(sequence)) { sequence = ""; scanNum = 0; charge = 0; peaks.Clear(); continue; } peptideSet.Add(sequence); var spectrum = new ProductSpectrum(peaks, scanNum) { MsLevel = 2 }; var sequenceReader = new MgfSequenceReader(); var seq = sequenceReader.GetSequence(sequence); var specMatch = new SpectrumMatch(seq, spectrum, scanNum, charge, _decoy); sequence = ""; scanNum = 0; charge = 0; specMatches.Add(specMatch); peaks.Clear(); } else { var parts = line.Split('\t'); if (parts.Length < 2) throw new FormatException("Invalid line in MGF file: " + line); var mz = Convert.ToDouble(parts[0]); var intensity = Convert.ToDouble(parts[1]); peaks.Add(new Peak(mz, intensity)); } break; } } return specMatches; }
private IEnumerable<TagSequenceMatch> GetMatches(IEnumerable<SequenceTag> tags, ProductSpectrum spec, IScorer scorer) { // Match tags against the database var proteinsToTags = GetProteinToMatchedTagsMap(tags, _searchableDb, _aaSet, _tolerance, _tolerance); //var tagSequenceMatchList = new List<TagSequenceMatch>(); // Extend matches foreach (var entry in proteinsToTags) { var proteinName = entry.Key; var matchedTagSet = entry.Value; var proteinSequence = matchedTagSet.Sequence; var tagFinder = new TagMatchFinder(spec, scorer, _featureFinder, proteinSequence, _tolerance, _aaSet, _maxSequenceMass); foreach (var matchedTag in matchedTagSet.Tags) { if (matchedTag.Length < _minMatchedTagLength) continue; if (matchedTag.NTermFlankingMass == null && matchedTag.CTermFlankingMass == null) continue; var matches = tagFinder.FindMatches(matchedTag).ToArray(); //var prevScore = double.NegativeInfinity; //foreach (var match in matches.OrderByDescending(m => m.Score)) foreach(var match in matches) { var sequence = proteinSequence.Substring(match.StartIndex, match.EndIndex - match.StartIndex); //re-scoring var sequenceObj = Sequence.CreateSequence(sequence, match.ModificationText, _aaSet); match.Score = sequenceObj.GetInternalCleavages().Sum(c => scorer.GetFragmentScore(c.PrefixComposition, c.SuffixComposition)); //var numMatches = matchedTag.Length * 2 + match.NTermScore + match.CTermScore; //var score = match.NTermScore + match.CTermScore; //score += (matchedTag.NumReliableNTermFlankingMasses > 0) // ? matchedTag.Length*CompositeScorer.ScoreParam.Prefix.ConsecutiveMatch //: matchedTag.Length*CompositeScorer.ScoreParam.Suffix.ConsecutiveMatch; // Poisson p-value score //var n = (match.EndIndex - match.StartIndex - 1)*2; //var lambda = numMatches / n; //var pValue = 1 - Poisson.CDF(lambda, numMatches); //var pScore = (pValue > 0) ? - Math.Log(pValue, 2) : 50.0; //if (numMatches < 5) break; //if (prevScore - numMatches > 2) break; //prevScore = numMatches; var pre = match.StartIndex == 0 ? '-' : proteinSequence[match.StartIndex - 1]; // startIndex is inclusive var post = match.EndIndex >= proteinSequence.Length ? '-' : proteinSequence[match.EndIndex]; // endIndex is Exclusive yield return new TagSequenceMatch(sequence, proteinName, match, pre, post); //tagSequenceMatchList.Add(new TagSequenceMatch(sequence, proteinName, match, pre, post)); } } } //return tagSequenceMatchList; }
private FlankingMassMatch GetBestMatchInTheGraph(ShiftedSequenceGraph seqGraph, ProductSpectrum spec, double? featureMass) { FlankingMassMatch match = null; var bestScore = double.NegativeInfinity; var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; if (featureMass != null && !_tolerance.IsWithin(sequenceMass, (double)featureMass)) continue; var charge = (int) Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var mostAbundantIsotopeMz = Ion.GetIsotopeMz(sequenceMass, charge, Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex); if (!spec.IsolationWindow.Contains(mostAbundantIsotopeMz)) continue; //var feature = new TargetFeature(sequenceMass, charge, spec.ScanNum); if (_featureFinder != null) { var ms1Corr = _featureFinder.GetMs1EvidenceScore(spec.ScanNum, sequenceMass, charge); if (ms1Corr < Ms1CorrThreshold) continue; } var curScoreAndModifications = seqGraph.GetScoreAndModifications(_ms2Scorer); var curScore = curScoreAndModifications.Item1; // var curScore = seqGraph.GetFragmentScore(_ms2Scorer); if (curScore > bestScore) { match = new FlankingMassMatch(curScore, sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, curScoreAndModifications.Item2); //match = new FlankingMassMatch(curScore, // sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, new ModificationInstance[0]); bestScore = curScore; } } return match; }
internal DeconvScorer(ProductSpectrum deconvolutedSpectrum, Tolerance productTolerance) { if (deconvolutedSpectrum.ActivationMethod != ActivationMethod.ETD) { _prefixOffsetMass = BaseIonType.B.OffsetComposition.Mass; _suffixOffsetMass = BaseIonType.Y.OffsetComposition.Mass; } else { _prefixOffsetMass = BaseIonType.C.OffsetComposition.Mass; _suffixOffsetMass = BaseIonType.Z.OffsetComposition.Mass; } _ionMassBins = new HashSet<int>(); foreach (var p in deconvolutedSpectrum.Peaks) { var mass = p.Mz; var deltaMass = productTolerance.GetToleranceAsDa(mass, 1); var minMass = mass - deltaMass; var maxMass = mass + deltaMass; var minBinNum = GetBinNumber(minMass); var maxBinNum = GetBinNumber(maxMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { _ionMassBins.Add(binNum); } } }
public Tuple<int, int, int, int, string, string> GetLongestSequence(ProductSpectrum spectrum, Sequence sequence) { _spectrum = spectrum; _sequence = sequence; _baseIonTypes = _spectrum.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCid : BaseIonTypesEtd; var cleavages = _sequence.GetInternalCleavages().ToArray(); var prefixValueArr = new int[cleavages.Length]; var suffixValueArr = new int[cleavages.Length]; var prefixPeakArr = new Peak[cleavages.Length]; var suffixPeakArr = new Peak[cleavages.Length]; //var peakList = new double[_spectrum.Peaks.Length]; int cleavageIndex = 0; /* for (int i = 0; i < peakList.Length; i++) { peakList[i] = _spectrum.Peaks[i].Intensity; }*/ //var rankings = ArrayUtil.GetRankings(peakList); foreach (var c in cleavages) { foreach (var baseIonType in _baseIonTypes) { var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; for (var charge = _minCharge; charge <= _maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); if (_spectrum.GetCorrScore(ion, _tolerance, RelativeIsotopeIntensityThreshold) < .7) continue; if (baseIonType.IsPrefix) prefixValueArr[cleavageIndex] = 1; else suffixValueArr[cleavageIndex] = 1; } } cleavageIndex++; } var prefixSequenceArr = new int[_sequence.Count]; var suffixSequenceArr = new int[_sequence.Count]; prefixSequenceArr[0] = prefixValueArr[0]; suffixSequenceArr[suffixSequenceArr.Length - 1] = suffixValueArr[suffixValueArr.Length - 1]; for (int i = 1; i < prefixValueArr.Length; i++) { if (prefixValueArr[i] == 1 && prefixValueArr[i - 1] == 1) { if (_sequence[i] is ModifiedAminoAcid) continue; prefixSequenceArr[i] = 1; } } for (int i = suffixValueArr.Length - 2; i >= 0; i--) { if (suffixValueArr[i] == 1 && suffixValueArr[i + 1] == 1) { if (_sequence[i + 1] is ModifiedAminoAcid) continue; suffixSequenceArr[i + 1] = 1; } } var prefixSubString = FindLongestSubstring(prefixSequenceArr); var prefixStartIndex = -1; var prefixEndIndex = -1; //var prefixSequencePeaks = new List<Peak>(); //var prefixPval = -1.0; var prefixSequence = ""; if (prefixSubString != "") { var prefixIndex = string.Concat(prefixSequenceArr); prefixStartIndex = prefixIndex.IndexOf(prefixSubString) + 1; prefixEndIndex = (prefixStartIndex == 1) ? 1 : prefixStartIndex + prefixSubString.Length - 1; //prefixSequencePeaks = GetPrefixSequencePeaks(prefixPeakArr, prefixStartIndex, prefixEndIndex); //var prefixRankSum = GetSequenceRankSum(prefixSequencePeaks, rankings, peakList); //prefixPval = FitScoreCalculator.GetRankSumPvalue(peakList.Length, prefixSequencePeaks.Count, prefixRankSum); prefixSequence = GetStringSubSequence(_sequence, prefixStartIndex, prefixEndIndex); } var suffixSubString = FindLongestSubstring(suffixSequenceArr); var suffixStartIndex = -1; var suffixEndIndex = -1; //var suffixSequencePeaks = new List<Peak>(); //var suffixPval = -1.0; var suffixSequence = ""; if (suffixSubString != "") { var suffixIndex = string.Concat(suffixSequenceArr); suffixStartIndex = suffixIndex.IndexOf(suffixSubString) + 1; suffixEndIndex = (suffixStartIndex == 1) ? 1 : suffixStartIndex + suffixSubString.Length - 1; //suffixSequencePeaks = GetSuffixSequencePeaks(suffixPeakArr, suffixStartIndex, suffixEndIndex); //var suffixRankSum = GetSequenceRankSum(suffixSequencePeaks, rankings, peakList); //suffixPval = FitScoreCalculator.GetRankSumPvalue(peakList.Length, suffixSequencePeaks.Count, suffixRankSum); suffixSequence = GetStringSubSequence(_sequence, suffixStartIndex, suffixEndIndex); } return new Tuple<int, int, int, int, string, string>(prefixStartIndex, prefixEndIndex, suffixStartIndex, suffixEndIndex, prefixSequence, suffixSequence); }
public MatchedPeakCounter(ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge) : base(ms2Spec, tolerance, minCharge, maxCharge) { RelativeIsotopeIntensityThreshold = 0.7; }