public FitBasedLogLikelihoodRatioScorer(ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge)
  {
      _ms2Spec = ms2Spec;
      _tolerance = tolerance;
      _minCharge = minCharge;
      _maxCharge = maxCharge;
      _baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD;
  }
Exemple #2
0
        public LikelihoodScorer(LikelihoodScoringModel model, ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge, bool massErrorScore = true)
            : base(ms2Spec, tolerance, minCharge, maxCharge, 0.1)
        {
            //_model = model;

            //var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity) * 0.1;
            //var medIntensity = ms2Spec.Peaks.Select(p => p.Intensity).Median();
            //_refIntensity = Math.Min(medIntensity, refIntensity);

            _refIntensity = GetRefIntensity(ms2Spec.Peaks);

            _includeMassErrorScore = massErrorScore;
        }
        public void OutputStatistics(ProductSpectrum spectrum, Sequence sequence)
        {
            var baseIonTypes = spectrum.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCid : BaseIonTypesEtd;
            var cleavages = sequence.GetInternalCleavages().ToArray();
            var tolerance = new Tolerance(10);

            var maxIntensity = spectrum.Peaks.Max(p => p.Intensity);

            foreach (var c in cleavages)
            {
                foreach (var baseIonType in baseIonTypes)
                {
                    var fragmentComposition = baseIonType.IsPrefix
                        ? c.PrefixComposition + baseIonType.OffsetComposition
                        : c.SuffixComposition + baseIonType.OffsetComposition;

                    for (int charge = MinCharge; charge <= MaxCharge; charge++)
                    {
                        var ion = new Ion(fragmentComposition, charge);
                        var observedPeaks = spectrum.GetAllIsotopePeaks(ion, tolerance, RelativeIsotopeIntensityThreshold);

                        if (observedPeaks == null) continue;

                        var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex();


                        // representative peak intensity
                        var ionPeakIntensity = observedPeaks[mostAbundantIsotopeIndex].Intensity;

                        // calc. correlation
                        var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities();
                        var observedIntensities = new double[observedPeaks.Length];
                        for (var i = 0; i < observedPeaks.Length; i++)
                        {
                            var observedPeak = observedPeaks[i];
                            observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0;
                        }
                        var corrCoeff = FitScoreCalculator.GetPearsonCorrelation(isotopomerEnvelope, observedIntensities);

                        // mz error
                        var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex);
                        var errorPpm = ((observedPeaks[mostAbundantIsotopeIndex].Mz - mostAbundantIsotopeMz)/
                                        mostAbundantIsotopeMz)*1e6;


                    }
                }

            }

        }
 public double GetScoreTest(Sequence sequence, ProductSpectrum spectrum)
 {
     var score = 0d;
     var tol = new Tolerance(10);
     var matchCounter = new CorrMatchedPeakCounter(spectrum,tol,1,20);
     var prefixCompArr = sequence.GetPrefixCompositions().ToArray();
     foreach (var c in prefixCompArr)
     {
         if(c.Equals(Composition.Zero)) Console.WriteLine("found zero");
     }
     var suffixCompArr = sequence.GetSuffixCompositions().ToArray();
     for (int i = 0; i < prefixCompArr.Length; i++)
     {
         score += matchCounter.GetFragmentScore(prefixCompArr[i], suffixCompArr[i]);
     }
     return score;
 }
Exemple #5
0
 public TagMatchFinder(
     ProductSpectrum spec,
     IScorer ms2Scorer,
     LcMsPeakMatrix featureFinder,
     string proteinSequence, 
     Tolerance tolerance, 
     AminoAcidSet aaSet, 
     double maxSequenceMass)
 {
     _spec = spec;
     _ms2Scorer = ms2Scorer;
     _featureFinder = featureFinder;
     _proteinSequence = proteinSequence;
     _tolerance = tolerance;
     _aaSet = aaSet;
     _maxSequenceMass = maxSequenceMass;
 }
        public CompositeScorerBasedOnDeconvolutedSpectrum(DeconvolutedSpectrum deconvolutedSpectrum, ProductSpectrum spec, Tolerance productTolerance, IMassBinning comparer)
            : base(deconvolutedSpectrum, productTolerance)
        {
            ReferencePeakIntensity = GetRefIntensity(spec.Peaks);
            _comparer = comparer;
            _massBinToPeakMap = new Dictionary<int, DeconvolutedPeak>();

            foreach (var p in deconvolutedSpectrum.Peaks)
            {
                var mass = p.Mz;
                var deltaMass = productTolerance.GetToleranceAsDa(mass, 1);
                var minMass = mass - deltaMass;
                var maxMass = mass + deltaMass;

                var binNum = comparer.GetBinNumber(mass);

                if (binNum < 0)
                {
                    binNum = comparer.GetBinNumber(minMass);
                    if (binNum < 0) binNum = comparer.GetBinNumber(maxMass);
                }

                // filter out
                if (binNum < 0) continue;

                UpdateDeconvPeak(binNum, p as DeconvolutedPeak);
                // going up
                for (var nextBinNum = binNum + 1; nextBinNum < comparer.NumberOfBins; nextBinNum++)
                {
                    var nextBinMass = comparer.GetMassStart(nextBinNum);
                    if (minMass < nextBinMass && nextBinMass < maxMass) UpdateDeconvPeak(nextBinNum, p as DeconvolutedPeak); //_ionMassChkBins[nextBinNum] = true;
                    else break;
                }

                // going down
                for (var prevBinNum = binNum - 1; prevBinNum < comparer.NumberOfBins; prevBinNum--)
                {
                    var prevBinMass = comparer.GetMassEnd(prevBinNum);
                    if (minMass < prevBinMass && prevBinMass < maxMass) UpdateDeconvPeak(prevBinNum, p as DeconvolutedPeak); //_ionMassChkBins[prevBinNum] = true;
                    else break;
                }

            }
        }
        public double ComputeScore(ProductSpectrum ms2Spec, Sequence sequence)
        {
            
            _baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD;
            _sequence = sequence;
            _ms2Spec = ms2Spec;

            if (!_rankingInfo.TryGetValue(ms2Spec.ScanNum, out _peakRanking))
            {
                _peakRanking = ArrayUtil.GetRankings(ms2Spec.Peaks.Select(peak => peak.Intensity));
                _rankingInfo.Add(ms2Spec.ScanNum, _peakRanking);
            }

            FindMatchedPeaks();

            var score = GetRankSumScore();

            return score;
        }
        public IcScores GetScores(ProductSpectrum spec, string seqStr, Composition composition, int charge, int ms2ScanNum)
        {
            if (spec == null) return null;
            var scorer = new CompositeScorer(spec, Tolerance, MinProductCharge, Math.Min(MaxProductCharge, charge));
            var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, seqStr, AminoAcid.ProteinCTerm);
            if (seqGraph == null) return null;

            var bestScore = double.NegativeInfinity;
            Tuple<double, string> bestScoreAndModifications = null;
            var protCompositions = seqGraph.GetSequenceCompositions();

            for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++)
            {
                seqGraph.SetSink(modIndex);
                var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();

                if (!protCompositionWithH2O.Equals(composition)) continue;

                var curScoreAndModifications = seqGraph.GetFragmentScoreAndModifications(scorer);
                var curScore = curScoreAndModifications.Item1;

                if (!(curScore > bestScore)) continue;

                bestScoreAndModifications = curScoreAndModifications;
                bestScore = curScore;
            }

            if (bestScoreAndModifications == null) return null;

            var modifications = bestScoreAndModifications.Item2;
            var seqObj = Sequence.CreateSequence(seqStr, modifications, AminoAcidSet);

            double score;
            int nMatchedFragments;

            GetCompositeScores(seqObj, charge, ms2ScanNum, out score, out nMatchedFragments);
            return new IcScores(nMatchedFragments, score, modifications);
        }
Exemple #9
0
        /// <summary>
        /// Handle a single spectrum element and child nodes
        /// Called by ReadSpectrumList (xml hierarchy)
        /// </summary>
        /// <param name="reader">XmlReader that is only valid for the scope of the single spectrum element</param>
        /// <param name="includePeaks">Whether to read binary data arrays</param>
        private Spectrum ReadSpectrum(XmlReader reader, bool includePeaks = true)
        {
            reader.MoveToContent();
            string index = reader.GetAttribute("index");
            //Console.WriteLine("Reading spectrum indexed by " + index);
            // This is correct for Thermo files converted by msConvert, but need to implement for others as well
            string spectrumId = reader.GetAttribute("id"); // Native ID in mzML_1.1.0; unique identifier in mzML_1.0.0, often same as nativeID
            string nativeId = spectrumId;
            if (_version == MzML_Version.mzML1_0_0)
            {
                nativeId = reader.GetAttribute("nativeID"); // Native ID in mzML_1.0.0
            }

            int scanNum = -1;
            // If a random access reader, there is already a scan number stored, based on the order of the index. Use it instead.
            if (_randomAccess)
            {
                scanNum = (int) (_spectrumOffsets.NativeToIdMap[nativeId]);
            }
            else
            {
                scanNum = (int)(_artificialScanNum++);
                // Interpret the NativeID (if the format has an interpreter) and use it instead of the artificial number.
                // TODO: Better handling than the artificial ID for other nativeIDs (ones currently not supported)
                int num = 0;
                if (NativeIdConversion.TryGetScanNumberInt(nativeId, out num))
                {
                    scanNum = num;
                }
            }

            int defaultArraySize = Convert.ToInt32(reader.GetAttribute("defaultArrayLength"));
            reader.ReadStartElement("spectrum"); // Throws exception if we are not at the "spectrum" tag.
            bool is_ms_ms = false;
            int msLevel = 0;
            bool centroided = false;
            double tic = 0;
            List<Precursor> precursors = new List<Precursor>();
            List<ScanData> scans = new List<ScanData>();
            List<BinaryDataArray> bdas = new List<BinaryDataArray>();
            while (reader.ReadState == ReadState.Interactive)
            {
                // Handle exiting out properly at EndElement tags
                if (reader.NodeType != XmlNodeType.Element)
                {
                    reader.Read();
                    continue;
                }
                //////////////////////////////////////////////////////////////////////////////////////
                /// 
                /// MS1 Spectra: only need Spectrum data: scanNum, MSLevel, ElutionTime, mzArray, IntensityArray
                /// 
                /// MS2 Spectra: use ProductSpectrum; adds ActivationMethod and IsolationWindow
                /// 
                //////////////////////////////////////////////////////////////////////////////////////
                switch (reader.Name)
                {
                    case "referenceableParamGroupRef":
                        // Schema requirements: zero to many instances of this element
                        reader.Skip();
                        break;
                    case "cvParam":
                        // Schema requirements: zero to many instances of this element
                        /* MAY supply a *child* term of MS:1000465 (scan polarity) only once
                         *   e.g.: MS:1000129 (negative scan)
                         *   e.g.: MS:1000130 (positive scan)
                         * MUST supply a *child* term of MS:1000559 (spectrum type) only once
                         *   e.g.: MS:1000322 (charge inversion mass spectrum)
                         *   e.g.: MS:1000325 (constant neutral gain spectrum)
                         *   e.g.: MS:1000326 (constant neutral loss spectrum)
                         *   e.g.: MS:1000328 (e/2 mass spectrum)
                         *   e.g.: MS:1000341 (precursor ion spectrum)
                         *   e.g.: MS:1000579 (MS1 spectrum)
                         *   e.g.: MS:1000580 (MSn spectrum)
                         *   e.g.: MS:1000581 (CRM spectrum)
                         *   e.g.: MS:1000582 (SIM spectrum)
                         *   e.g.: MS:1000583 (SRM spectrum)
                         *   e.g.: MS:1000620 (PDA spectrum)
                         *   e.g.: MS:1000627 (selected ion current chromatogram)
                         *   e.g.: MS:1000789 (enhanced multiply charged spectrum)
                         *   e.g.: MS:1000790 (time-delayed fragmentation spectrum)
                         *   et al.
                         * MUST supply term MS:1000525 (spectrum representation) or any of its children only once
                         *   e.g.: MS:1000127 (centroid spectrum)
                         *   e.g.: MS:1000128 (profile spectrum)
                         * MAY supply a *child* term of MS:1000499 (spectrum attribute) one or more times
                         *   e.g.: MS:1000285 (total ion current)
                         *   e.g.: MS:1000497 (zoom scan)
                         *   e.g.: MS:1000504 (base peak m/z)
                         *   e.g.: MS:1000505 (base peak intensity)
                         *   e.g.: MS:1000511 (ms level)
                         *   e.g.: MS:1000527 (highest observed m/z)
                         *   e.g.: MS:1000528 (lowest observed m/z)
                         *   e.g.: MS:1000618 (highest observed wavelength)
                         *   e.g.: MS:1000619 (lowest observed wavelength)
                         *   e.g.: MS:1000796 (spectrum title)
                         *   et al.
                         */
                        switch (reader.GetAttribute("accession"))
                        {
                            case "MS:1000127":
                                // name="centroid spectrum"
                                centroided = true;
                                break;
                            case "MS:1000128":
                                // name="profile spectrum"
                                centroided = false;
                                break;
                            case "MS:1000511":
                                // name="ms level"
                                msLevel = Convert.ToInt32(reader.GetAttribute("value"));
                                break;
                            case "MS:1000579":
                                // name="MS1 spectrum"
                                is_ms_ms = false;
                                break;
                            case "MS:1000580":
                                // name="MSn spectrum"
                                is_ms_ms = true;
                                break;
                            case "MS:1000285":
                                // name="total ion current"
                                tic = Convert.ToDouble(reader.GetAttribute("value"));
                                break;
                        }
                        reader.Read(); // Consume the cvParam element (no child nodes)
                        break;
                    case "userParam":
                        // Schema requirements: zero to many instances of this element
                        reader.Skip();
                        break;
                    case "spectrumDescription": // mzML_1.0.0 compatibility
                        // Schema requirements: one instance of this element
                        ReadSpectrumDescription(reader.ReadSubtree(), ref scans, ref precursors, out centroided);
                        reader.ReadEndElement(); // "spectrumDescription" must have child nodes
                        break;
                    case "scanList":
                        // Schema requirements: zero to one instances of this element
                        scans.AddRange(ReadScanList(reader.ReadSubtree()));
                        reader.ReadEndElement(); // "scanList" must have child nodes
                        break;
                    case "precursorList":
                        // Schema requirements: zero to one instances of this element
                        precursors.AddRange(ReadPrecursorList(reader.ReadSubtree()));
                        reader.ReadEndElement(); // "precursorList" must have child nodes
                        break;
                    case "productList":
                        // Schema requirements: zero to one instances of this element
                        reader.Skip();
                        break;
                    case "binaryDataArrayList":
                        // Schema requirements: zero to one instances of this element
                        if (includePeaks)
                        {
                            bdas.AddRange(ReadBinaryDataArrayList(reader.ReadSubtree(), defaultArraySize));
                            reader.ReadEndElement(); // "binaryDataArrayList" must have child nodes
                        }
                        else
                        {
                            reader.Skip();
                        }
                        break;
                    default:
                        reader.Skip();
                        break;
                }
            }
            reader.Close();
            // Process the spectrum data
            ScanData scan = new ScanData();
            Spectrum spectrum;
            BinaryDataArray mzs = new BinaryDataArray();
            BinaryDataArray intensities = new BinaryDataArray();
            foreach (var bda in bdas)
            {
                if (bda.ArrayType == ArrayType.m_z_array)
                {
                    mzs = bda;
                }
                else if (bda.ArrayType == ArrayType.intensity_array)
                {
                    intensities = bda;
                }
            }
            
            if (!centroided && includePeaks)
            {
                // Centroid spectrum
                // ProteoWizard
                var centroider = new Centroider(mzs.Data, intensities.Data);
                double[] centroidedMzs, centroidedIntensities;
                centroider.GetCentroidedData(out centroidedMzs, out centroidedIntensities);
                mzs.Data = centroidedMzs;
                intensities.Data = centroidedIntensities;
            }
            if (scans.Count == 1)
            {
                scan = scans[0];
            }
            else if (scans.Count > 1)
            {
                // TODO: Should do something else to appropriately handle combinations...
                scan = scans[0];
            }

            if (is_ms_ms)
            {
                Precursor precursor = new Precursor();
                if (precursors.Count == 1)
                {
                    precursor = precursors[0];
                }
                else if (precursors.Count > 1)
                {
                    // TODO: Should do something else to appropriately handle multiple precursors...
                    precursor = precursors[0];
                }
                SelectedIon ion = new SelectedIon();
                if (precursor.Ions.Count == 1)
                {
                    ion = precursor.Ions[0];
                }
                else if (precursor.Ions.Count > 1)
                {
                    // TODO: Should do something else to appropriately handle multiple selected ions...
                    ion = precursor.Ions[0];
                }

                var pspectrum = new ProductSpectrum(mzs.Data, intensities.Data, scanNum);
                pspectrum.ActivationMethod = precursor.Activation;
                // Select mz value to use based on presence of a Thermo-specific user param.
                // The user param has a slightly higher precision, if that matters.
                double mz = scan.MonoisotopicMz == 0.0 ? ion.SelectedIonMz : scan.MonoisotopicMz;
                pspectrum.IsolationWindow = new IsolationWindow(precursor.IsolationWindowTargetMz, precursor.IsolationWindowLowerOffset, precursor.IsolationWindowUpperOffset, mz, ion.Charge);
                //pspectrum.IsolationWindow.OldCharge = ion.OldCharge;
                //pspectrum.IsolationWindow.SelectedIonMz = ion.SelectedIonMz;
                spectrum = pspectrum;
            }
            else
            {
                spectrum = new Spectrum(mzs.Data, intensities.Data, scanNum);
            }
            spectrum.MsLevel = msLevel;
            spectrum.ElutionTime = scan.StartTime;
            spectrum.NativeId = nativeId;
            spectrum.TotalIonCurrent = tic;
            
            return spectrum;
        }
        private void GetNodeStatistics(bool isDecoy, ProductSpectrum ms2Spec, Sequence sequence, StreamWriter writer) //, StreamWriter mzErrorWriter)
        {
            if (ms2Spec == null) return;
            if (sequence == null) return;
            //var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity) * 0.01;
            //refIntensity = Math.Min(ms2Spec.Peaks.Select(p => p.Intensity).Median(), refIntensity);
            
            var BaseIonTypesCID = new[] { BaseIonType.B, BaseIonType.Y };
            var BaseIonTypesETD = new[] { BaseIonType.C, BaseIonType.Z };
            var tolerance = new Tolerance(15);
            var minCharge = 1;
            var maxCharge = 20;
            var baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD;
            
            var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity);

            var activationMethodFlag = ms2Spec.ActivationMethod == ActivationMethod.ETD ? 1 : 2;
            var cleavages = sequence.GetInternalCleavages();

            var prevPrefixFragMass = 0d;
            var prevPrefixObsIonMass = 0d;
            var prevPrefixObsIonCharge = 0;
            var prevPrefixObsIonIntensity = 0d;

            var prevSuffixFragMass = 0d;
            var prevSuffixObsIonMass = 0d;
            var prevSuffixObsIonCharge = 0;
            var prevSuffixObsIonIntensity = 0d;

            var nComplementaryFrags = 0;
            
            var cleavageIndex = 0;

            foreach (var c in cleavages)
            {
                var bothObs = true;
                foreach (var baseIonType in baseIonTypes)
                {
                    var peakType = baseIonType.IsPrefix ? 1 : 2; // unexplained
                    var fragmentComposition = baseIonType.IsPrefix
                                  ? c.PrefixComposition + baseIonType.OffsetComposition
                                  : c.SuffixComposition + baseIonType.OffsetComposition;

                    var curFragMass = fragmentComposition.Mass;
                    var curObsIonMass = 0d;
                    var curObsIonCharge = 0;
                    var curObsIonDist = 1.0d;
                    var curObsIonCorr = 0d;
                    var curObsIonIntensity = 0d;

                    var ionMatch = false;
                    for (var charge = minCharge; charge <= maxCharge; charge++)
                    {
                        var ion = new Ion(fragmentComposition, charge);

                        var isotopePeaks = ms2Spec.GetAllIsotopePeaks(ion, tolerance, 0.1);

                        if (isotopePeaks == null) continue;

                        var distCorr = AbstractFragmentScorer.GetDistCorr(ion, isotopePeaks);
                        
                        if (distCorr.Item2 < 0.7 && distCorr.Item1 > 0.07) continue;
                        var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex();
                        var mostAbuPeak = isotopePeaks[mostAbundantIsotopeIndex];
                        
                        var summedIntensity = isotopePeaks.Where(p => p != null).Sum(p => p.Intensity);
                        var intScore = summedIntensity/refIntensity;
                        //var intScore = mostAbuPeak.Intensity / medIntensity;
                        //var intScore = summedIntensity / refIntensity;

                        if (ionMatch == false || curObsIonIntensity < intScore)
                        {
                            curObsIonMass = Ion.GetMonoIsotopicMass(mostAbuPeak.Mz, charge, mostAbundantIsotopeIndex);
                            curObsIonCharge = charge;
                            curObsIonCorr = distCorr.Item2;
                            curObsIonDist = distCorr.Item1;
                            curObsIonIntensity = intScore;
                        }
                        ionMatch = true;
                    }

                    if (!ionMatch)
                    {
                        bothObs = false;
                        continue;
                    }


                    writer.Write(activationMethodFlag);
                    writer.Write("\t");

                    writer.Write(peakType);
                    writer.Write("\t");

                    writer.Write("{0:0.000}", curFragMass);
                    writer.Write("\t");

                    writer.Write("{0}", curObsIonCharge);
                    writer.Write("\t");

                    writer.Write("{0:0.000}", curObsIonDist);
                    writer.Write("\t");

                    writer.Write("{0:0.000}", curObsIonCorr);
                    writer.Write("\t");

                    writer.Write("{0:0.000}", curObsIonIntensity);
                    writer.Write("\t");

                    writer.Write("{0:0.000}", (Math.Abs(curFragMass - curObsIonMass)/curFragMass)*1e6);
                    writer.Write("\n");
                    
                    // mz error output 
                    /*
                    if (baseIonType.IsPrefix && prevPrefixFragMass > 0 & prevPrefixObsIonMass > 0)
                    {
                        var aaMass = Math.Abs(prevPrefixFragMass - curFragMass);
                        var massError = Math.Abs(Math.Abs(prevPrefixObsIonMass - curObsIonMass) - aaMass);
                        var massErrorPpm = (massError / curObsIonMass) * 1e6;
                         mzErrorWriter.WriteLine("{0}\t{1:0.000}\t{2}", activationMethodFlag, massErrorPpm, Math.Abs(prevPrefixObsIonCharge - curObsIonCharge));
                    }
                    else if (prevSuffixFragMass > 0 & prevSuffixObsIonMass > 0)
                    {
                        var aaMass = Math.Abs(prevSuffixFragMass - curFragMass);
                        var massError = Math.Abs(Math.Abs(prevSuffixObsIonMass - curObsIonMass) - aaMass);
                        var massErrorPpm = (massError / curObsIonMass) * 1e6;
                        mzErrorWriter.WriteLine("{0}\t{1:0.000}\t{2}", activationMethodFlag, massErrorPpm, Math.Abs(prevSuffixObsIonCharge - curObsIonCharge));
                    }
                    */
                    if (baseIonType.IsPrefix)
                    {
                        prevPrefixFragMass = curFragMass;
                        prevPrefixObsIonMass = curObsIonMass;
                        prevPrefixObsIonCharge = curObsIonCharge;
                        prevPrefixObsIonIntensity = curObsIonIntensity;
                        //Array.Copy(curObsIonMass, prevPrefixObsIonMass, curObsIonMass.Length);    
                    }
                    else
                    {
                        prevSuffixFragMass = curFragMass;
                        prevSuffixObsIonMass = curObsIonMass;
                        prevSuffixObsIonCharge = curObsIonCharge;
                        prevSuffixObsIonIntensity = curObsIonIntensity;
                        //Array.Copy(curObsIonMass, prevSuffixObsIonMass, curObsIonMass.Length);    
                    }
                    
                }
                
                if (bothObs)
                {

                    //pairWriter.Write("{0}\t{1}\t", prevPrefixObsIonIntensity, prevSuffixObsIonIntensity);
                    nComplementaryFrags++;
                }
                cleavageIndex++;
            }

            Console.WriteLine("{0}\t{1}", nComplementaryFrags, sequence.Count);

            //if (!isDecoy) Console.WriteLine("{0}", totalExplainedAbundanceRatio);
        }
        private void GetMatchStatistics(ProductSpectrum ms2Spec, Sequence sequence, int parentIonCharge, StreamWriter writer)
        {
            if (ms2Spec == null) return;
            if (sequence == null) return;

            var BaseIonTypesCID = new[] { BaseIonType.B, BaseIonType.Y };
            var BaseIonTypesETD = new[] { BaseIonType.C, BaseIonType.Z };
            var tolerance = new Tolerance(12);
            var MinProductCharge = 1;
            var MaxProductCharge = Math.Min(parentIonCharge+2, 20);

            var baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD;
            var refIntensity = CompositeScorer.GetRefIntensity(ms2Spec.Peaks);

            var activationMethodFlag = ms2Spec.ActivationMethod == ActivationMethod.ETD ? 2 : 1;
            var cleavages = sequence.GetInternalCleavages();
            var nComplementaryFrags = 0;

            var prefixStat = new FragmentStat();
            var suffixStat = new FragmentStat();

            var minMz = ms2Spec.Peaks.First().Mz;
            var maxMz = ms2Spec.Peaks.Last().Mz;


            var cleavageIndex = 0;

            var preFixIonCheck = new bool[sequence.Count + 1];
            var sufFixIonCheck = new bool[sequence.Count + 1];
            
            foreach (var c in cleavages)
            {
                var prefixHit = false;
                var suffixHit = false;

                foreach (var baseIonType in baseIonTypes)
                {
                    var stat = baseIonType.IsPrefix ? prefixStat : suffixStat;

                    var fragmentComposition = baseIonType.IsPrefix
                                  ? c.PrefixComposition + baseIonType.OffsetComposition
                                  : c.SuffixComposition + baseIonType.OffsetComposition;

                    if (fragmentComposition.Mass < ms2Spec.Peaks[0].Mz) continue;

                    var curFragMass = fragmentComposition.Mass;
                    /*var curObsIonCharge = 0;
                    var curObsIonDist = 1.0d;
                    var curObsIonCorr = 0d;
                    var curObsIonIntensity = 0d;
                    var curObsIonMassError = 0d;*/

                    var mostAbundantIsotopeIndex = fragmentComposition.GetMostAbundantIsotopeZeroBasedIndex();
                    var fragmentIonMostAbuMass = fragmentComposition.Mass + Constants.C13MinusC12 * mostAbundantIsotopeIndex;

                    var maxCharge = (int)Math.Floor(fragmentIonMostAbuMass / (minMz - Constants.Proton));
                    var minCharge = (int)Math.Ceiling(fragmentIonMostAbuMass / (maxMz - Constants.Proton));
                    if (maxCharge < 1 || maxCharge > MaxProductCharge) maxCharge = MaxProductCharge;
                    if (minCharge < 1 || minCharge < MinProductCharge) minCharge = MinProductCharge;

                    //var ionMatch = false;
                    for (var charge = minCharge; charge <= maxCharge; charge++)
                    {
                        var ion = new Ion(fragmentComposition, charge);

                        var isotopePeaks = ms2Spec.GetAllIsotopePeaks(ion, tolerance, 0.1);

                        if (isotopePeaks == null) continue;

                        var distCorr = CompositeScorer.GetDistCorr(ion, isotopePeaks);
                        if (distCorr.Item2 < 0.7 && distCorr.Item1 > 0.03) continue;
                        var mostAbuPeak = isotopePeaks[mostAbundantIsotopeIndex];
                        var intScore = mostAbuPeak.Intensity / refIntensity;
                        /*
                        if (ionMatch == false || curObsIonIntensity < intScore)
                        {
                            curObsIonCharge = charge;
                            curObsIonCorr = distCorr.Item2;
                            curObsIonDist = distCorr.Item1;
                            curObsIonIntensity = intScore;

                            var mostAbuPeakMz = Ion.GetIsotopeMz(curFragMass, charge, mostAbundantIsotopeIndex);
                            curObsIonMassError = (Math.Abs(mostAbuPeak.Mz - mostAbuPeakMz) / mostAbuPeakMz) * 1e6;
                            
                            //var curObsIonMass = Ion.GetMonoIsotopicMass(mostAbuPeak.Mz, charge, mostAbundantIsotopeIndex);
                            //curObsIonMassError = (Math.Abs(curFragMass - curObsIonMass) / curFragMass) * 1e6;
                        }
                        ionMatch = true;
                        */
                        var mostAbuPeakMz = Ion.GetIsotopeMz(curFragMass, charge, mostAbundantIsotopeIndex);
                        var curObsIonMassError = (Math.Abs(mostAbuPeak.Mz - mostAbuPeakMz) / mostAbuPeakMz) * 1e6;

                        stat.Count++;
                        stat.Intensity += Math.Min(intScore, 1.0);
                        stat.Corr += distCorr.Item2;
                        stat.Dist += distCorr.Item1;
                        stat.MassError += curObsIonMassError;

                        if (baseIonType.IsPrefix) prefixHit = true;
                        else suffixHit = true;
                    }
                    
                    //if (!ionMatch) continue;
                }

                if (prefixHit) preFixIonCheck[cleavageIndex] = true;
                if (suffixHit) sufFixIonCheck[cleavageIndex] = true;

                if (prefixHit && suffixHit)
                {
                    nComplementaryFrags++;
                }
                cleavageIndex++;
            }

            var preContCount = 0;
            var sufContCount = 0;
            for (var i = 0; i < preFixIonCheck.Length - 1; i++)
            {
                if (preFixIonCheck[i] && preFixIonCheck[i + 1]) preContCount++;
                if (sufFixIonCheck[i] && sufFixIonCheck[i + 1]) sufContCount++;
            }

            writer.Write(activationMethodFlag);
            writer.Write("\t");
            writer.Write(sequence.Composition.Mass);
            writer.Write("\t");
            writer.Write(sequence.Count);
            writer.Write("\t");
            writer.Write(nComplementaryFrags);
            writer.Write("\t");
            
            writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", prefixStat.Count, preContCount, prefixStat.Intensity, prefixStat.Corr, prefixStat.Dist, prefixStat.MassError);
            writer.Write("\t");
            writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", suffixStat.Count, sufContCount, suffixStat.Intensity, suffixStat.Corr, suffixStat.Dist, suffixStat.MassError);
            writer.Write("\n");
        }
 public CorrMatchedPeakCounter(ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge, double corrScoreThreshold = 0.7):
     base(ms2Spec, tolerance, minCharge, maxCharge, 0.1)
 {
     _corrScoreThreshold = corrScoreThreshold;
 }
Exemple #13
0
        /// <summary>
        /// Reads the mass spectrum with the specified scanNum from the raw file
        /// </summary>
        /// <param name="scanNum">scan number</param>
        /// <param name="includePeaks">whether to include peak data</param>
        /// <returns>mass spectrum</returns>
        public Spectrum ReadMassSpectrum(int scanNum, bool includePeaks = true)
        {

            var scanInfo = GetScanInfo(scanNum);

            // default empty arrays, if peak data not requested.
            double[] mzArr = new double[]{};
            double[] intensityArr = new double[]{};

            if (includePeaks)
            {
                _msfileReader.GetScanData(scanNum, out mzArr, out intensityArr, 0, true);
                
            }

            var elutionTime = RtFromScanNum(scanNum);
            var nativeId = "controllerType=0 controllerNumber=1 scan=" + scanNum;

            // Call scanInfo.MSLevel in order to update dictionary _msLevel
            var msLevel = ReadMsLevel(scanNum);

            if (msLevel == 1)
                return new Spectrum(mzArr, intensityArr, scanNum)
                {
                    ElutionTime = elutionTime,
                    TotalIonCurrent = scanInfo.TotalIonCurrent,
                    NativeId = nativeId,
                };

            var isolationWindow = ReadPrecursorInfo(scanNum);

            var productSpec = new ProductSpectrum(mzArr, intensityArr, scanNum)
            {
                MsLevel = scanInfo.MSLevel,
                ElutionTime = elutionTime,
                TotalIonCurrent = scanInfo.TotalIonCurrent,
                NativeId = nativeId,
                ActivationMethod = GetActivationMethod(scanNum),
                IsolationWindow = isolationWindow
            };
            return productSpec;
        }
Exemple #14
0
        public IList<SpectrumMatch> Read() 
        {
            var specMatches = new List<SpectrumMatch>();
            var file = File.ReadLines(_fileName);
            var mgfState = MgfState.Label;

            var sequence = "";
            int scanNum = 0, charge = 0;
            var peaks = new List<Peak>();

            var peptideSet = new HashSet<string>();
            foreach (var line in file)
            {
                switch (mgfState)
                {
                    case MgfState.Label:
                        if (line == "BEGIN IONS") mgfState = MgfState.Parameter;
                        else throw new FormatException("Invalid MGF file.");
                        break;
                    case MgfState.Parameter:
                        var parameter = line.Split('=');
                        if (parameter.Length < 2) throw new FormatException("Invalid line in MGF file: " + line);
                        if (parameter[0] == "SEQ") sequence = parameter[1];
                        else if (parameter[0] == "SCANS") scanNum = Convert.ToInt32(parameter[1]);
                        else if (parameter[0] == "CHARGE")
                        {
                            var chargeStr = parameter[1].Substring(0, parameter[1].Length - 1);
                            charge = Convert.ToInt32(chargeStr);
                            mgfState = MgfState.Peak;
                            if (sequence == "" || scanNum == 0 || charge == 0)
                                throw new FormatException("Incomplete spectrum entry.");
                        }
                        break;
                    case MgfState.Peak:
                        if (line == "END IONS")
                        {
                            if (peaks.Count == 0) throw new FormatException("Empty peak list.");
                            mgfState = MgfState.Label;
                            if (peptideSet.Contains(sequence))
                            {
                                sequence = "";
                                scanNum = 0;
                                charge = 0;
                                peaks.Clear();
                                continue;
                            }
                            peptideSet.Add(sequence);
                            var spectrum = new ProductSpectrum(peaks, scanNum) { MsLevel = 2 };
                            var sequenceReader = new MgfSequenceReader();
                            var seq = sequenceReader.GetSequence(sequence);
                            var specMatch = new SpectrumMatch(seq, spectrum, scanNum, charge, _decoy);
                            sequence = "";
                            scanNum = 0;
                            charge = 0;
                            specMatches.Add(specMatch);
                            peaks.Clear();
                        }
                        else
                        {
                            var parts = line.Split('\t');
                            if (parts.Length < 2) throw new FormatException("Invalid line in MGF file: " + line);
                            var mz = Convert.ToDouble(parts[0]);
                            var intensity = Convert.ToDouble(parts[1]);
                            peaks.Add(new Peak(mz, intensity));
                        }
                        break;
                }
            }
            return specMatches;
        }
        private IEnumerable<TagSequenceMatch> GetMatches(IEnumerable<SequenceTag> tags, ProductSpectrum spec, IScorer scorer)
        {
            // Match tags against the database
            var proteinsToTags = GetProteinToMatchedTagsMap(tags, _searchableDb, _aaSet, _tolerance, _tolerance);
            //var tagSequenceMatchList = new List<TagSequenceMatch>();

            // Extend matches
            foreach (var entry in proteinsToTags)
            {
                var proteinName = entry.Key;
                var matchedTagSet = entry.Value;
                var proteinSequence = matchedTagSet.Sequence;

                var tagFinder = new TagMatchFinder(spec, scorer, _featureFinder, proteinSequence, _tolerance, _aaSet, _maxSequenceMass);

                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < _minMatchedTagLength) continue;
                    if (matchedTag.NTermFlankingMass == null && matchedTag.CTermFlankingMass == null) continue;

                    var matches = tagFinder.FindMatches(matchedTag).ToArray();
                    //var prevScore = double.NegativeInfinity;
                    //foreach (var match in matches.OrderByDescending(m => m.Score))
                    foreach(var match in matches)
                    {
                        var sequence = proteinSequence.Substring(match.StartIndex, match.EndIndex - match.StartIndex);
                        //re-scoring
                        var sequenceObj = Sequence.CreateSequence(sequence, match.ModificationText, _aaSet);
                        match.Score = sequenceObj.GetInternalCleavages().Sum(c => scorer.GetFragmentScore(c.PrefixComposition, c.SuffixComposition));

                        //var numMatches = matchedTag.Length * 2 + match.NTermScore + match.CTermScore;
                        //var score = match.NTermScore + match.CTermScore;
                        //score += (matchedTag.NumReliableNTermFlankingMasses > 0)
                          //  ? matchedTag.Length*CompositeScorer.ScoreParam.Prefix.ConsecutiveMatch
                            //: matchedTag.Length*CompositeScorer.ScoreParam.Suffix.ConsecutiveMatch;
                        

                        // Poisson p-value score
                        //var n = (match.EndIndex - match.StartIndex - 1)*2;
                        //var lambda = numMatches / n;
                        //var pValue = 1 - Poisson.CDF(lambda, numMatches);
                        //var pScore = (pValue > 0) ? - Math.Log(pValue, 2) : 50.0;
                        //if (numMatches < 5) break;
                        //if (prevScore - numMatches > 2) break;
                        //prevScore = numMatches;

                        var pre = match.StartIndex == 0 ? '-' : proteinSequence[match.StartIndex - 1]; // startIndex is inclusive
                        var post = match.EndIndex >= proteinSequence.Length ? '-' : proteinSequence[match.EndIndex]; // endIndex is Exclusive

                        yield return new TagSequenceMatch(sequence, proteinName, match, pre, post);

                        //tagSequenceMatchList.Add(new TagSequenceMatch(sequence, proteinName, match, pre, post));
                    }
                }
            }
            //return tagSequenceMatchList;
        }
Exemple #16
0
        private FlankingMassMatch GetBestMatchInTheGraph(ShiftedSequenceGraph seqGraph, ProductSpectrum spec, double? featureMass)
        {
            FlankingMassMatch match = null;
            var bestScore = double.NegativeInfinity;
            var protCompositions = seqGraph.GetSequenceCompositions();
            for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++)
            {
                seqGraph.SetSink(modIndex);
                var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                var sequenceMass = protCompositionWithH2O.Mass;
                if (featureMass != null && !_tolerance.IsWithin(sequenceMass, (double)featureMass)) continue;

                var charge =
                    (int)
                        Math.Round(sequenceMass /
                                   (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton));

                var mostAbundantIsotopeMz = Ion.GetIsotopeMz(sequenceMass, charge,
                    Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex);

                if (!spec.IsolationWindow.Contains(mostAbundantIsotopeMz)) continue;

                //var feature = new TargetFeature(sequenceMass, charge, spec.ScanNum);
                
                if (_featureFinder != null)
                {
                    var ms1Corr = _featureFinder.GetMs1EvidenceScore(spec.ScanNum, sequenceMass, charge);
                    if (ms1Corr < Ms1CorrThreshold) continue;
                }

                var curScoreAndModifications = seqGraph.GetScoreAndModifications(_ms2Scorer);
                var curScore = curScoreAndModifications.Item1;
//                var curScore = seqGraph.GetFragmentScore(_ms2Scorer);
                if (curScore > bestScore)
                {
                    match = new FlankingMassMatch(curScore,
                        sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, curScoreAndModifications.Item2);
                    //match = new FlankingMassMatch(curScore,
                    //    sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, new ModificationInstance[0]);
                    bestScore = curScore;
                }
            }

            return match;
        }
            internal DeconvScorer(ProductSpectrum deconvolutedSpectrum, Tolerance productTolerance)
            {
                if (deconvolutedSpectrum.ActivationMethod != ActivationMethod.ETD)
                {
                    _prefixOffsetMass = BaseIonType.B.OffsetComposition.Mass;
                    _suffixOffsetMass = BaseIonType.Y.OffsetComposition.Mass;
                }
                else
                {
                    _prefixOffsetMass = BaseIonType.C.OffsetComposition.Mass;
                    _suffixOffsetMass = BaseIonType.Z.OffsetComposition.Mass;
                }

                _ionMassBins = new HashSet<int>();
                foreach (var p in deconvolutedSpectrum.Peaks)
                {
                    var mass = p.Mz;
                    var deltaMass = productTolerance.GetToleranceAsDa(mass, 1);
                    var minMass = mass - deltaMass;
                    var maxMass = mass + deltaMass;

                    var minBinNum = GetBinNumber(minMass);
                    var maxBinNum = GetBinNumber(maxMass);
                    for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
                    {
                        _ionMassBins.Add(binNum);
                    }
                }
            }
        public Tuple<int, int, int, int, string, string> GetLongestSequence(ProductSpectrum spectrum, Sequence sequence)
        {
            _spectrum = spectrum;
            _sequence = sequence;
            _baseIonTypes = _spectrum.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCid : BaseIonTypesEtd;

            var cleavages = _sequence.GetInternalCleavages().ToArray();
            var prefixValueArr = new int[cleavages.Length];
            var suffixValueArr = new int[cleavages.Length];
            var prefixPeakArr = new Peak[cleavages.Length];
            var suffixPeakArr = new Peak[cleavages.Length];
            //var peakList = new double[_spectrum.Peaks.Length];

            int cleavageIndex = 0;

            /*
            for (int i = 0; i < peakList.Length; i++)
            {
                peakList[i] = _spectrum.Peaks[i].Intensity;
            }*/

            //var rankings = ArrayUtil.GetRankings(peakList);

            foreach (var c in cleavages)
            {
                foreach (var baseIonType in _baseIonTypes)
                {
                    var fragmentComposition = baseIonType.IsPrefix
                                 ? c.PrefixComposition + baseIonType.OffsetComposition
                                 : c.SuffixComposition + baseIonType.OffsetComposition;
                    for (var charge = _minCharge; charge <= _maxCharge; charge++)
                    {

                        var ion = new Ion(fragmentComposition, charge);
                        if (_spectrum.GetCorrScore(ion, _tolerance, RelativeIsotopeIntensityThreshold) < .7) continue;
                        if (baseIonType.IsPrefix) prefixValueArr[cleavageIndex] = 1;
                        else suffixValueArr[cleavageIndex] = 1;


                    }
                }
                cleavageIndex++;
            }

            var prefixSequenceArr = new int[_sequence.Count];
            var suffixSequenceArr = new int[_sequence.Count];
            prefixSequenceArr[0] = prefixValueArr[0];
            suffixSequenceArr[suffixSequenceArr.Length - 1] = suffixValueArr[suffixValueArr.Length - 1];

            for (int i = 1; i < prefixValueArr.Length; i++)
            {
                if (prefixValueArr[i] == 1 && prefixValueArr[i - 1] == 1)
                {
                    if (_sequence[i] is ModifiedAminoAcid) continue;
                    prefixSequenceArr[i] = 1;

                }
            }

            for (int i = suffixValueArr.Length - 2; i >= 0; i--)
            {
                if (suffixValueArr[i] == 1 && suffixValueArr[i + 1] == 1)
                {
                    if (_sequence[i + 1] is ModifiedAminoAcid) continue;
                    suffixSequenceArr[i + 1] = 1;
                }
            }



            var prefixSubString = FindLongestSubstring(prefixSequenceArr);
            var prefixStartIndex = -1;
            var prefixEndIndex = -1;
            //var prefixSequencePeaks = new List<Peak>();
            //var prefixPval = -1.0;
            var prefixSequence = "";
            if (prefixSubString != "")
            {
                var prefixIndex = string.Concat(prefixSequenceArr);
                prefixStartIndex = prefixIndex.IndexOf(prefixSubString) + 1;
                prefixEndIndex = (prefixStartIndex == 1) ? 1 : prefixStartIndex + prefixSubString.Length - 1;
                //prefixSequencePeaks = GetPrefixSequencePeaks(prefixPeakArr, prefixStartIndex, prefixEndIndex);
                //var prefixRankSum = GetSequenceRankSum(prefixSequencePeaks, rankings, peakList);
                //prefixPval = FitScoreCalculator.GetRankSumPvalue(peakList.Length, prefixSequencePeaks.Count, prefixRankSum);
                prefixSequence = GetStringSubSequence(_sequence, prefixStartIndex, prefixEndIndex);
            }

            var suffixSubString = FindLongestSubstring(suffixSequenceArr);
            var suffixStartIndex = -1;
            var suffixEndIndex = -1;
            //var suffixSequencePeaks = new List<Peak>();
            //var suffixPval = -1.0;
            var suffixSequence = "";

            if (suffixSubString != "")
            {
                var suffixIndex = string.Concat(suffixSequenceArr);
                suffixStartIndex = suffixIndex.IndexOf(suffixSubString) + 1;
                suffixEndIndex = (suffixStartIndex == 1) ? 1 : suffixStartIndex + suffixSubString.Length - 1;
                //suffixSequencePeaks = GetSuffixSequencePeaks(suffixPeakArr, suffixStartIndex, suffixEndIndex);
                //var suffixRankSum = GetSequenceRankSum(suffixSequencePeaks, rankings, peakList);
                //suffixPval = FitScoreCalculator.GetRankSumPvalue(peakList.Length, suffixSequencePeaks.Count, suffixRankSum);
                suffixSequence = GetStringSubSequence(_sequence, suffixStartIndex, suffixEndIndex);
            }

            return new Tuple<int, int, int, int, string, string>(prefixStartIndex, prefixEndIndex, suffixStartIndex, suffixEndIndex, prefixSequence, suffixSequence);
        }
Exemple #19
0
 public MatchedPeakCounter(ProductSpectrum ms2Spec, Tolerance tolerance, int minCharge, int maxCharge)
     : base(ms2Spec, tolerance, minCharge, maxCharge)
 {
     RelativeIsotopeIntensityThreshold = 0.7;
 }