Esempio n. 1
0
        public bool SameCluster(LcMsFeature f1, LcMsFeature f2)
        {
            if (f1.DataSetId == f2.DataSetId)
            {
                return(false);
            }
            // tolerant in mass dimension?
            if (!_oneDaltonShift)
            {
                var massTol = Math.Min(_tolerance.GetToleranceAsTh(f1.Mass), _tolerance.GetToleranceAsTh(f2.Mass));
                if (Math.Abs(f1.Mass - f2.Mass) > massTol)
                {
                    return(false);
                }
            }
            else
            {
                var massTol  = Math.Min(_tolerance.GetToleranceAsTh(f1.Mass), _tolerance.GetToleranceAsTh(f2.Mass));
                var massDiff = Math.Abs(f1.Mass - f2.Mass);

                if (f1.Mass > 10000 && f2.Mass > 10000)
                {
                    if (massDiff > massTol && Math.Abs(massDiff - 1) > massTol && Math.Abs(massDiff - 2) > massTol)
                    {
                        return(false);
                    }
                }
                else
                {
                    if (massDiff > massTol && Math.Abs(massDiff - 1) > massTol)
                    {
                        return(false);
                    }
                }
            }

            /*
             * var coeLen = f1.CoElutionNetLength(f2);
             * if (coeLen > f1.NetLength * 0.25 || coeLen > f2.NetLength * 0.25) return true;
             *
             * // tolerant in elution time dimension?
             * var lenDiff = Math.Abs(f1.NetLength - f2.NetLength) / Math.Max(f1.NetLength, f2.NetLength);
             * if (lenDiff > 0.8) return false;
             */
            //if (f1.CoElutedByNet(f2, 0.01)) return true; //e.g) 200*0.001 = 0.2 min = 30 sec

            if (f1.CoElutedByNet(f2, 0.01))
            {
                return(true);                            //e.g) 200*0.001 = 0.2 min = 30 sec
            }
            //if (NetDiff(f1, f2) < TolNet) return true;
            return(false);
        }
Esempio n. 2
0
            public bool SameCluster(ProteinSpectrumMatch prsm1, ProteinSpectrumMatch prsm2)
            {
                var tol = new Tolerance(10);
                //if (!prsm1.ProteinName.Equals(prsm2.ProteinName)) return false;
                var massDiff = Math.Abs(prsm1.Mass - prsm2.Mass);

                if (massDiff > tol.GetToleranceAsTh(prsm1.Mass))
                {
                    return(false);
                }

                var elutionDiff = Math.Abs(_run.GetElutionTime(prsm1.ScanNum) - _run.GetElutionTime(prsm2.ScanNum));

                if (prsm1.SequenceText.Equals(prsm2.SequenceText))
                {
                    if (elutionDiff > _elutionLength * 0.02)
                    {
                        return(false);
                    }
                }
                else
                {
                    if (elutionDiff > _elutionLength * 0.005)
                    {
                        return(false);
                    }
                }

                return(true);
            }
Esempio n. 3
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv";


            //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance));

            for (var i = 0; i < NdataSet; i++)
            {
                var rawFile   = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i));
                var mspFile   = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i));
                var mspFile2  = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i));
                var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i));
                Console.WriteLine(rawFile);
                var run       = PbfLcMsRun.GetLcMsRun(rawFile);
                var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                prsmList1.AddRange(prsmList2);

                var prsmList = MergePrsm(prsmList1);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName;
                }

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < NdataSet; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", GetDataSetNames(i));
            }

            OutputCrossTabWithId(outFilePath, alignment);
        }
Esempio n. 4
0
        public bool CheckChargeState(ObservedIsotopeEnvelope envelope)
        {
            var checkCharge = envelope.Charge;

            if (checkCharge > 20)
            {
                return(true);                  //high charge (> +20), just pass
            }
            var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum;
            var peakEndIndex   = envelope.MaxMzPeak.IndexInSpectrum;
            var nPeaks         = peakEndIndex - peakStartIndex + 1;

            if (nPeaks < 10)
            {
                return(false);
            }
            if (envelope.NumberOfPeaks > nPeaks * 0.7)
            {
                return(true);
            }

            var tolerance = new Tolerance(5);
            var threshold = nPeaks * 0.5;
            var mzTol     = tolerance.GetToleranceAsTh(Spectrum.Peaks[peakStartIndex].Mz);

            var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4);
            var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60);
            var maxDeltaMz     = Constants.C13MinusC12 / minCheckCharge + mzTol;
            var nChargeGaps    = new int[maxCheckCharge - minCheckCharge + 1];

            for (var i = peakStartIndex; i <= peakEndIndex; i++)
            {
                for (var j = i + 1; j <= peakEndIndex; j++)
                {
                    var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz;

                    if (deltaMz > maxDeltaMz)
                    {
                        break;
                    }
                    for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++)
                    {
                        if (c < minCheckCharge || c > maxCheckCharge)
                        {
                            continue;
                        }
                        var k = (int)c - minCheckCharge;
                        nChargeGaps[k]++;

                        if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks)
                        {
                            return(false);
                        }
                    }
                }
            }

            return(true);
        }
        /// <summary>
        /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra)
        /// </summary>
        /// <param name="mz">target m/z</param>
        /// <param name="tolerance">tolerance</param>
        /// <returns>XIC as a list of XICPeaks</returns>
        public IList <XicPeak> GetExtractedIonChromatogram(double mz, Tolerance tolerance)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            return(GetExtractedIonChromatogram(minMz, maxMz));
        }
Esempio n. 6
0
        /// <summary>
        /// Gets the extracted ion chromatogram of the specified m/z range (using only MS2 spectra)
        /// </summary>
        /// <param name="mz">target m/z</param>
        /// <param name="tolerance">tolerance</param>
        /// <param name="precursorIonMz">precursor m/z of the precursor ion</param>
        /// <param name="minScanNum">minimum scan number (inclusive)</param>
        /// <param name="maxScanNum">maximum scan number (inclusive)</param>
        /// <returns>XIC as an Xic object</returns>
        public Xic GetProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz, int minScanNum, int maxScanNum)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            return(GetProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz, minScanNum, maxScanNum));
        }
Esempio n. 7
0
        public static IList <Peak> FindAllPeaks(List <Peak> peakList, double mz, Tolerance tolerance)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            return(FindAllPeaks(peakList, minMz, maxMz));
        }
Esempio n. 8
0
        public new Xic GetFullProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            return(GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz));
        }
Esempio n. 9
0
        /// <summary>
        /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra)
        /// </summary>
        /// <param name="mz">target m/z</param>
        /// <param name="tolerance">tolerance</param>
        /// <returns>XIC as an Xic object</returns>
        public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            return(GetPrecursorExtractedIonChromatogram(minMz, maxMz));
        }
Esempio n. 10
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\Quant\aligned\promex_crosstab.tsv";
            //const string outFolder = @"\\protoapps\UserData\Jungkap\CompRef\aligned";
            var runLabels = new string[] { "1x1", "1x2", "1x3", "1x4", "1x5", "5x1", "5x2", "5x3", "5x4", "5x5", "10x1", "10x2", "10x3", "10x4", "10x5", };
            var nDataset  = runLabels.Length;

            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new SpikeInFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var rawFile   = string.Format(@"{0}\{1}.pbf", RawFolder, datasets[i]);
                var mspFile   = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, datasets[i]);
                var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, datasets[i]);

                var run      = PbfLcMsRun.GetLcMsRun(rawFile);
                var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName;
                }

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            /*
             * for (var i = 0; i < nDataset; i++)
             * {
             *  alignment.FillMissingFeatures(i);
             *  Console.WriteLine("{0} has been processed", runLabels[i]);
             * }
             */
            OutputCrossTabWithId(outFilePath, alignment, runLabels);
        }
Esempio n. 11
0
        /// <summary>
        /// Gets the extracted ion chromatogram of the specified m/z range (using only MS1 spectra)
        /// Only XicPeaks around the targetScanNum are returned
        /// </summary>
        /// <param name="mz">target m/z</param>
        /// <param name="tolerance">tolerance</param>
        /// <param name="targetScanNum">target scan number to generate xic</param>
        /// <param name="maxNumConsecutiveScansWithoutPeak">maximum number of consecutive scans with a peak</param>
        /// <returns>XIC around targetScanNum</returns>
        public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance, int targetScanNum, int maxNumConsecutiveScansWithoutPeak = 3)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            if (targetScanNum < 0)
            {
                return(GetPrecursorExtractedIonChromatogram(minMz, maxMz));
            }
            return(GetPrecursorExtractedIonChromatogram(minMz, maxMz, targetScanNum, maxNumConsecutiveScansWithoutPeak));
        }
Esempio n. 12
0
        private void CollectSequenceTagGraphEdges()
        {
            for (var i = 0; i < _deconvolutedPeaks.Count; i++)
            {
                var massTh = _tolerance.GetToleranceAsTh(_deconvolutedPeaks[i].Mass);

                for (var j = i + 1; j < _deconvolutedPeaks.Count; j++)
                {
                    //if (_deconvolutedPeaks[i].PeakShare(_deconvolutedPeaks[j])) continue;

                    var massGap    = _deconvolutedPeaks[j].Mass - _deconvolutedPeaks[i].Mass;
                    var maxMassGap = massGap + massTh;
                    var minMassGap = massGap - massTh;

                    var peakGap = new SequenceTagGraphEdge(i, j, massGap);
                    if (minMassGap > _maxAminoAcidMass)
                    {
                        break;
                    }
                    if (maxMassGap < _minAminoAcidMass)
                    {
                        continue;
                    }

                    foreach (var aa in _aminoAcidsArray)
                    {
                        var massError = Math.Abs(peakGap.Mass - aa.Composition.Mass);
                        if (minMassGap < aa.Composition.Mass && aa.Composition.Mass < maxMassGap)
                        {
                            peakGap.AddMatchedAminoAcid(aa, massError);
                        }
                    }
                    if (peakGap.AminoAcidList.Count > 0)
                    {
                        AddEdge(peakGap);
                    }
                }
            }
        }
Esempio n. 13
0
        public List <MSMSSpectrumPeak> GetPeaks(double mz, Tolerance tolerance)
        {
            var minMz     = mz - tolerance.GetToleranceAsTh(mz);
            var maxMz     = mz + tolerance.GetToleranceAsTh(mz);
            var matchList = new List <MSMSSpectrumPeak>();
            var start     = BinarySearch(new MSMSSpectrumPeak(minMz, 1));

            if (start < 0)
            {
                start = ~start;
            }
            for (var i = start; i < Count; i++)
            {
                var p = this[i];
                if (p.Mz > maxMz)
                {
                    break;
                }
                matchList.Add(p);
            }
            return(matchList);
        }
Esempio n. 14
0
        private IList <LcMsPeakCluster> RemoveOverlappedFeatures(SortedSet <LcMsPeakCluster> featureSet)
        {
            var outFeatures = new List <LcMsPeakCluster>();
            var tol         = new Tolerance(5);

            while (true)
            {
                if (featureSet.Count < 1)
                {
                    break;
                }

                var bestFeature = featureSet.First();
                featureSet.Remove(bestFeature);
                outFeatures.Add(bestFeature);
                var massTol = tol.GetToleranceAsTh(bestFeature.RepresentativeMass);

                var tempList = new List <LcMsPeakCluster>();
                foreach (var f in bestFeature.OverlappedFeatures)
                {
                    if (featureSet.Remove(f))
                    {
                        var massDiff = Math.Abs(bestFeature.RepresentativeMass - f.RepresentativeMass);
                        if ((Math.Abs(massDiff - 1.0) < massTol || Math.Abs(massDiff - 2.0) < massTol) && SimilarScore(bestFeature, f))
                        {
                            outFeatures.Add(f);
                            continue;
                        }

                        tempList.Add(f);
                    }
                }

                bestFeature.InActivateMajorPeaks();
                foreach (var f in tempList)
                {
                    f.UpdateScore(_spectra);
                    f.Score = _scorer.GetScore(f);
                    if (f.Score > _scorer.ScoreThreshold && f.GoodEnougth)
                    {
                        featureSet.Add(f);
                    }
                    else
                    {
                        //Console.WriteLine("{0}\t{1}\t{2} killed by {3}\t{4}\t{5}", f.Mass, f.MinScanNum, f.MaxScanNum, bestFeature.Mass, bestFeature.MinScanNum, bestFeature.MaxScanNum);
                    }
                }
            }

            return(outFeatures);
        }
Esempio n. 15
0
        internal RankedPeak FindPeak(double mz, Tolerance tolerance)
        {
            var tolTh = tolerance.GetToleranceAsTh(mz);
            var minMz = mz - tolTh;
            var maxMz = mz + tolTh;

            var index = Array.BinarySearch(Peaks, new RankedPeak((minMz + maxMz) / 2, 0, 0));

            if (index < 0)
            {
                index = ~index;
            }

            RankedPeak bestPeak      = null;
            var        bestIntensity = 0.0;

            // go down
            var i = index - 1;

            while (i >= 0 && i < Peaks.Length)
            {
                if (Peaks[i].Mz <= minMz)
                {
                    break;
                }
                if (Peaks[i].Intensity > bestIntensity)
                {
                    bestIntensity = Peaks[i].Intensity;
                    bestPeak      = Peaks[i];
                }
                --i;
            }

            // go up
            i = index;
            while (i >= 0 && i < Peaks.Length)
            {
                if (Peaks[i].Mz >= maxMz)
                {
                    break;
                }
                if (Peaks[i].Intensity > bestIntensity)
                {
                    bestIntensity = Peaks[i].Intensity;
                    bestPeak      = Peaks[i];
                }
                ++i;
            }
            return(bestPeak);
        }
Esempio n. 16
0
        /// <summary>
        /// Finds all isotope peaks corresponding to theoretical profiles with relative intensity higher than the threshold
        /// </summary>
        /// <param name="spectrum">Observed spectrum.</param>
        /// <param name="isotopomerEnvelope">The theoretical isotopic profile.</param>
        /// <param name="mass">Monoisotopic mass of the lipid.</param>
        /// <param name="tolerance">Peak ppm tolerance.</param>
        /// <returns>array of observed isotope peaks in the spectrum. null if no peak found.</returns>
        /// <remarks>
        /// This differs from the GetAllIsotopePeaks in <see cref="LipidUtil" /> in that it accepts the isotopomer envelope
        /// as an argument rather than calculating it on its own. This way we only calculate it once.
        /// </remarks>
        private Peak[] GetAllIsotopePeaks(Spectrum spectrum, IReadOnlyCollection <double> isotopomerEnvelope, double mass, Tolerance tolerance)
        {
            var peaks = spectrum.Peaks;
            var mostAbundantIsotopeIndex            = 0;
            var mostAbundantIsotopeMz               = mass;
            var mostAbundantIsotopeMatchedPeakIndex = spectrum.FindPeakIndex(mostAbundantIsotopeMz, tolerance);

            if (mostAbundantIsotopeMatchedPeakIndex < 0)
            {
                return(null);
            }

            var observedPeaks = new Peak[isotopomerEnvelope.Count];

            observedPeaks[mostAbundantIsotopeIndex] = peaks[mostAbundantIsotopeMatchedPeakIndex];

            // go up
            var peakIndex = mostAbundantIsotopeMatchedPeakIndex + 1;

            for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Count; isotopeIndex++)
            {
                var isotopeMz = mostAbundantIsotopeMz + isotopeIndex * Constants.C13MinusC12;
                var tolTh     = tolerance.GetToleranceAsTh(isotopeMz);
                var minMz     = isotopeMz - tolTh;
                var maxMz     = isotopeMz + tolTh;
                for (var i = peakIndex; i < peaks.Length; i++)
                {
                    var peakMz = peaks[i].Mz;
                    if (peakMz > maxMz)
                    {
                        peakIndex = i;
                        break;
                    }
                    if (peakMz >= minMz)    // find match, move to prev isotope
                    {
                        var peak = peaks[i];
                        if (observedPeaks[isotopeIndex] == null ||
                            peak.Intensity > observedPeaks[isotopeIndex].Intensity)
                        {
                            observedPeaks[isotopeIndex] = peak;
                        }
                    }
                }
            }

            return(observedPeaks);
        }
Esempio n. 17
0
        public bool Equals(ProteinSpectrumMatch other)
        {
            if (SearchToolType == other.SearchToolType)
            {
                return(SequenceText.Equals(other.SequenceText));
            }

            var massDiff = Math.Abs(Mass - other.Mass);
            var tol      = new Tolerance(10);

            if (massDiff < tol.GetToleranceAsTh(Mass) && FirstResidue == other.FirstResidue && LastResidue == other.LastResidue)
            {
                return(true);
            }

            return(false);
        }
Esempio n. 18
0
        public void TestFeatureAlignment()
        {
            const string outFilePath = @"\\protoapps\UserData\Jungkap\CompRef\aligned\promex_crosstab_temp.tsv";
            const string outFolder   = @"\\protoapps\UserData\Jungkap\CompRef\aligned";
            var          runLabels   = new string[] { "32A", "32B", "32C", "32D", "32E", "32F", "32G", "33A", "33B", "33C", "33D", "33E", "33F", "33G" };
            var          nDataset    = runLabels.Length;
            //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var rawFile   = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.pbf", RawFolder, runLabels[i]);
                var mspFile   = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ_IcTda.tsv", MsPfFolder, runLabels[i]);
                var ms1FtFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.ms1ft", Ms1FtFolder, runLabels[i]);

                var run      = PbfLcMsRun.GetLcMsRun(rawFile);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);

                if (File.Exists(mspFile))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);

                    for (var j = 0; j < prsmList.Count; j++)
                    {
                        var match = prsmList[j];
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    for (var j = 0; j < features.Count; j++)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                        foreach (var match in prsmList)
                        {
                            if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                            {
                                features[j].ProteinSpectrumMatches.Add(match);
                            }
                        }
                    }
                }


                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", runLabels[i]);
            }

            OutputCrossTabWithId(outFilePath, alignment, runLabels);
        }
Esempio n. 19
0
        public void TestGeneratingProductXics()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath);

//            const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf";
            const string rafFilePath = @"H:\Research\Jarret\10mz\raw\Q_2014_0523_50_10_fmol_uL_10mz.raf";

            if (!File.Exists(rafFilePath))
            {
                Assert.Ignore(@"Skipping raf portion of test {0} since file not found: {1}", methodName, rafFilePath);
            }

            var rafRun = new PbfLcMsRun(rafFilePath);

            var tolerance = new Tolerance(10);

            var mzArr          = new double[100000];
            var precursorMzArr = new double[mzArr.Length];
            var rnd            = new Random();

            for (var i = 0; i < mzArr.Length; i++)
            {
                mzArr[i]          = rnd.NextDouble() * 1450.0 + 50.0;
                precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0;
            }

            var sw = new System.Diagnostics.Stopwatch();

            //double sec;

            // method 1
            sw.Start();
            for (var i = 0; i < mzArr.Length; i++)
            {
                var mz    = mzArr[i];
                var tolTh = tolerance.GetToleranceAsTh(mz);
                var minMz = mz - tolTh;
                var maxMz = mz + tolTh;
                var xic1  = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]);
                //var xic2 = rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]);
                //Assert.True(xic1.Equals(xic2));
            }
            sw.Stop();

            Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds);

            sw.Reset();
            sw.Start();
            for (var i = 0; i < mzArr.Length; i++)
            {
                var mz    = mzArr[i];
                var tolTh = tolerance.GetToleranceAsTh(mz);
                var minMz = mz - tolTh;
                var maxMz = mz + tolTh;
                rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]);
            }
            sw.Stop();

            Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds);

            Console.WriteLine(@"Done");
        }
Esempio n. 20
0
        public Peak[] GetAllIsotopePeaks(Spectrum spec, Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int[] peakIndexList)
        {
            var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex();
            var isotopomerEnvelope       = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities();

            peakIndexList = new int[isotopomerEnvelope.Length];

            var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex);
            var mostAbundantIsotopeMatchedPeakIndex = spec.FindPeakIndex(mostAbundantIsotopeMz, tolerance);

            if (mostAbundantIsotopeMatchedPeakIndex < 0)
            {
                return(null);
            }

            var observedPeaks = new Peak[isotopomerEnvelope.Length];

            observedPeaks[mostAbundantIsotopeIndex] = spec.Peaks[mostAbundantIsotopeMatchedPeakIndex];
            peakIndexList[mostAbundantIsotopeIndex] = mostAbundantIsotopeMatchedPeakIndex;

            // go down
            var peakIndex = mostAbundantIsotopeMatchedPeakIndex - 1;

            for (var isotopeIndex = mostAbundantIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--)
            {
                if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold)
                {
                    break;
                }
                var isotopeMz = ion.GetIsotopeMz(isotopeIndex);
                var tolTh     = tolerance.GetToleranceAsTh(isotopeMz);
                var minMz     = isotopeMz - tolTh;
                var maxMz     = isotopeMz + tolTh;
                for (var i = peakIndex; i >= 0; i--)
                {
                    var peakMz = spec.Peaks[i].Mz;
                    if (peakMz < minMz)
                    {
                        peakIndex = i;
                        break;
                    }
                    if (peakMz <= maxMz)    // find match, move to prev isotope
                    {
                        var peak = spec.Peaks[i];
                        if (observedPeaks[isotopeIndex] == null ||
                            peak.Intensity > observedPeaks[isotopeIndex].Intensity)
                        {
                            observedPeaks[isotopeIndex] = peak;
                            peakIndexList[isotopeIndex] = i;
                        }
                    }
                }
            }

            // go up
            peakIndex = mostAbundantIsotopeMatchedPeakIndex + 1;
            for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++)
            {
                if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold)
                {
                    break;
                }
                var isotopeMz = ion.GetIsotopeMz(isotopeIndex);
                var tolTh     = tolerance.GetToleranceAsTh(isotopeMz);
                var minMz     = isotopeMz - tolTh;
                var maxMz     = isotopeMz + tolTh;
                for (var i = peakIndex; i < spec.Peaks.Length; i++)
                {
                    var peakMz = spec.Peaks[i].Mz;
                    if (peakMz > maxMz)
                    {
                        peakIndex = i;
                        break;
                    }
                    if (peakMz >= minMz)    // find match, move to prev isotope
                    {
                        var peak = spec.Peaks[i];
                        if (observedPeaks[isotopeIndex] == null ||
                            peak.Intensity > observedPeaks[isotopeIndex].Intensity)
                        {
                            observedPeaks[isotopeIndex] = peak;
                            peakIndexList[isotopeIndex] = i;
                        }
                    }
                }
            }

            return(observedPeaks);
        }
Esempio n. 21
0
        public void TestQuantifyIdedProteoforms()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = String.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var prsmReader = new ProteinSpectrumMatchReader(0.01);

            var tolerance = new Tolerance(10);

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = String.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = String.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }

                // PrSM To Feature
                var prsmToFeatureIdMap = new int[prsmList.Count];
                for (var k = 0; k < prsmToFeatureIdMap.Length; k++)
                {
                    prsmToFeatureIdMap[k] = -1;
                }

                // Feature To PrSM
                var featureToPrsm = new List <ProteinSpectrumMatchSet>();

                var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood());
                var featureList   = new List <LcMsPeakCluster>();
                var featureId     = 0;
                for (var j = 0; j < prsmList.Count; j++)
                {
                    if (prsmToFeatureIdMap[j] >= 0)
                    {
                        continue;
                    }

                    var match      = prsmList[j];
                    var minScanNum = match.ScanNum;
                    var maxScanNum = match.ScanNum;
                    var mass       = match.Mass;
                    var charge     = match.Charge;
                    var massTh     = tolerance.GetToleranceAsTh(mass);
                    var id1        = match.ProteinId;

                    var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum);
                    var prsmSet = new ProteinSpectrumMatchSet(i)
                    {
                        match
                    };
                    if (feature == null)
                    {
                        feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge);
                        prsmToFeatureIdMap[j] = featureId;
                    }
                    else
                    {
                        prsmToFeatureIdMap[j] = featureId;
                        var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2);

                        for (var k = j + 1; k < prsmList.Count; k++)
                        {
                            var otherMatch = prsmList[k];
                            var id2        = otherMatch.ProteinId;
                            var et2        = run.GetElutionTime(otherMatch.ScanNum);

                            if (id1.Equals(id2) &&
                                feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol &&
                                Math.Abs(otherMatch.Mass - mass) < massTh)
                            {
                                prsmToFeatureIdMap[k] = featureId;
                                prsmSet.Add(otherMatch);
                            }
                        }
                    }
                    featureId++;

                    feature.Flag = 1;
                    featureList.Add(feature);
                    featureToPrsm.Add(prsmSet);
                }

                // overalp between features???
                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    for (var k = j + 1; k < featureList.Count; k++)
                    {
                        var f2 = featureList[k];
                        if (f2.Flag < 1)
                        {
                            continue;
                        }

                        var prsm2 = featureToPrsm[k];
                        if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsTh(f1.Mass))
                        {
                            continue;
                        }
                        if (!f1.CoElutedByNet(f2, 0.005))
                        {
                            continue;
                        }
                        if (!prsm1.ShareProteinId(prsm2))
                        {
                            continue;
                        }

                        // let us merge!!
                        if (f1.ScanLength > f2.ScanLength)
                        {
                            prsm1.AddRange(prsm2);
                            prsm2.Clear();
                            f2.Flag = 0;
                        }
                        else
                        {
                            prsm2.AddRange(prsm1);
                            prsm1.Clear();
                            f1.Flag = 0;
                        }
                    }
                }

                // now output results!!
                var ms1ftFilePath = String.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                var writer        = new StreamWriter(ms1ftFilePath);
                writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString());

                for (var j = 0; j < featureList.Count; j++)
                {
                    var f1 = featureList[j];
                    if (f1.Flag < 1)
                    {
                        continue;
                    }
                    var prsm1 = featureToPrsm[j];

                    var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1);
                    var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1);
                    f1.ExpandScanRange(minScanNum, maxScanNum);

                    writer.Write("{0}\t", j + 1);
                    writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1));
                }
                writer.Close();

                Console.WriteLine(ms1ftFilePath);
            }
        }
Esempio n. 22
0
        public void TestAlignFeatures()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string rawFolder           = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2";
            const string promexOutFolder     = @"D:\MassSpecFiles\UTEX\MSAlign";
            const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign";

            if (!Directory.Exists(rawFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder);
            }

            var nDataset = 32;
            var dataset  = new string[nDataset];

            for (var i = 0; i < nDataset; i++)
            {
                dataset[i] = String.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1);
                //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
            }

            var tolerance  = new Tolerance(10);
            var ftComparer = new UtexFeatureComparer(tolerance);
            var align      = new LcMsFeatureAlignment(ftComparer);
            var prsmReader = new ProteinSpectrumMatchReader(0.01);
            var validCount = 0;

            for (var i = 0; i < dataset.Length; i++)
            {
                var rawFile = String.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]);
                if (!File.Exists(rawFile))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile);
                    continue;
                }
                var run = PbfLcMsRun.GetLcMsRun(rawFile);

                var path = String.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]);
                if (!File.Exists(path))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", path);
                    continue;
                }

                var ms1ftPath = String.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]);
                if (!File.Exists(ms1ftPath))
                {
                    Console.WriteLine(@"Warning: Skipping file not found: {0}", ms1ftPath);
                    continue;
                }

                validCount++;

                //var map = new ProteinSpectrumMathMap(run, i, dataset[i]);
                //map.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);
                var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign);

                for (var j = 0; j < prsmList.Count; j++)
                {
                    var match = prsmList[j];
                    match.ProteinId =
                        match.ProteinName.Substring(
                            match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5);
                }


                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1ftPath, run);

                // tag features by PrSMs
                for (var j = 0; j < features.Count; j++)
                {
                    //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                    var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                    foreach (var match in prsmList)
                    {
                        if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                        {
                            features[j].ProteinSpectrumMatches.Add(match);
                        }
                    }
                }

                align.AddDataSet(i, features, run);
            }

            if (validCount == 0)
            {
                Assert.Ignore("No files found!");
            }

            align.AlignFeatures();
            Console.WriteLine("{0} alignments ", align.CountAlignedFeatures);
            align.RefineAbundance();

            var alignedFeatureList = align.GetAlignedFeatures();

            for (var i = 0; i < nDataset; i++)
            {
                var ms1ftPath = String.Format(@"{0}\{1}_aligned.ms1ft", promexOutFolder, dataset[i]);
                var writer    = new StreamWriter(ms1ftPath);
                writer.Write(LcMsFeatureFinderLauncher.GetHeaderString());
                writer.WriteLine("\tIdedMs2ScanNums");

                for (var j = 0; j < alignedFeatureList.Count; j++)
                {
                    writer.Write(j + 1);
                    writer.Write("\t");

                    if (alignedFeatureList[j][i] == null)
                    {
                        for (var k = 0; k < 14; k++)
                        {
                            writer.Write("0\t");
                        }
                        writer.Write("0\n");
                    }
                    else
                    {
                        writer.Write(LcMsFeatureFinderLauncher.GetString(alignedFeatureList[j][i]));
                        writer.Write("\t");

                        if (alignedFeatureList[j][i].ProteinSpectrumMatches == null)
                        {
                            writer.Write("");
                        }
                        else
                        {
                            var scanNums = string.Join(";", alignedFeatureList[j][i].ProteinSpectrumMatches.Select(prsm => prsm.ScanNum));
                            writer.Write(scanNums);
                        }

                        writer.Write("\n");
                    }
                }
                writer.Close();
            }
        }
Esempio n. 23
0
        public void TestGeneratingProductManyXics()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string rawFilePath = TestRawFilePath;

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath);
            //var run2 = new DiaLcMsRun(new OldPbfReader(Path.ChangeExtension(rawFilePath, ".pbf")), 0.0, 0.0);

            var tolerance = new Tolerance(10);

            var mzArr          = new double[100000];
            var precursorMzArr = new double[mzArr.Length];
            var rnd            = new Random();

            for (var i = 0; i < mzArr.Length; i++)
            {
                mzArr[i]          = rnd.NextDouble() * 1450.0 + 50.0;
                precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0;
            }

            var sw = new System.Diagnostics.Stopwatch();

            //double sec;

            // method 1
            sw.Start();
            for (var i = 0; i < mzArr.Length; i++)
            {
                var mz    = mzArr[i];
                var tolTh = tolerance.GetToleranceAsTh(mz);
                var minMz = mz - tolTh;
                var maxMz = mz + tolTh;
                var xic1  = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]);
                //var xic2 = run.GetFullProductExtractedIonChromatogram2(minMz, maxMz, precursorMzArr[i]);
                //Assert.True(xic1.Equals(xic2));
            }
            sw.Stop();

            Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds);

            sw.Reset();
            sw.Start();
            for (var i = 0; i < mzArr.Length; i++)
            {
                var mz    = mzArr[i];
                var tolTh = tolerance.GetToleranceAsTh(mz);
                var minMz = mz - tolTh;
                var maxMz = mz + tolTh;
                run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]);
            }
            sw.Stop();

            Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds);

            Console.WriteLine("Done");
        }
        public void AlignFeatures(List <string> datasets, string mspfFolder, string ms1ftFolder, string outFilePath)
        {
            var nDataset   = datasets.Count;
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(12);
            var alignment  = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var rawFile    = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]);
                var mspFile    = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]);
                var ms1FtFile  = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]);
                var ms1FtFile2 = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]);

                var run       = PbfLcMsRun.GetLcMsRun(rawFile);
                var features  = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);
                var features2 = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile2, run);
                features.AddRange(features2);

                if (File.Exists(mspFile))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                    //var prsmFeatureMatch = new bool[prsmList.Count];

                    for (var j = 0; j < prsmList.Count; j++)
                    {
                        var match = prsmList[j];
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    for (var j = 0; j < features.Count; j++)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                        for (var k = 0; k < prsmList.Count; k++)
                        {
                            var match = prsmList[k];
                            if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                            {
                                features[j].ProteinSpectrumMatches.Add(match);
                                //prsmFeatureMatch[k] = true;
                            }
                        }
                    }
                }

                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", datasets[i]);
            }

            AnalysisCompRef.OutputCrossTabWithId(outFilePath, alignment, datasets.ToArray());
        }
Esempio n. 25
0
        public void ExtractLcMsFeaturesForTrainingSet()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string idFileFolder = @"D:\MassSpecFiles\training\FilteredIdResult";

            if (!Directory.Exists(idFileFolder))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, idFileFolder);
            }

            var tolerance  = new Tolerance(10);
            var tolerance2 = new Tolerance(20);
            var id         = 1;


            for (var d = 0; d < TrainSetFileLists.Length; d++)
            {
                var dataset            = TrainSetFileLists[d];
                var dataname           = Path.GetFileNameWithoutExtension(dataset);
                var filtedIdResultFile = string.Format(@"{0}\{1}.trainset.tsv", idFileFolder, Path.GetFileNameWithoutExtension(dataset));
                var featureResult      = string.Format(@"{0}\{1}.ms1ft", idFileFolder, Path.GetFileNameWithoutExtension(dataset));

                if (!File.Exists(dataset))
                {
                    Console.WriteLine(@"Warning: Skipping since file not found: {0}", dataset);
                    continue;
                }
                if (!File.Exists(filtedIdResultFile))
                {
                    Console.WriteLine(@"Warning: Skipping since file not found: {0}", filtedIdResultFile);
                    continue;
                }


                var run = PbfLcMsRun.GetLcMsRun(dataset);


                var targetStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}.tsv", Path.GetFileNameWithoutExtension(dataset)));
                var decoyStatWriter  = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}_decoy.tsv", Path.GetFileNameWithoutExtension(dataset)));
                var writer           = new StreamWriter(featureResult);

                writer.Write("Ms2MinScan\tMs2MaxScan\tMs2MinCharge\tMs2MaxCharge\tMs2Mass\t");
                writer.Write("Mass\tMinScan\tMaxScan\tMinCharge\tMaxCharge\tMinTime\tMaxTime\tElution\tGood\n");
                var tsvParser = new TsvFileParser(filtedIdResultFile);

                var featureFinder = new LcMsPeakMatrix(run);


                for (var i = 0; i < tsvParser.NumData; i++)
                {
                    var minScan   = int.Parse(tsvParser.GetData("MinScan")[i]);
                    var maxScan   = int.Parse(tsvParser.GetData("MaxScan")[i]);
                    var minCharge = int.Parse(tsvParser.GetData("MinCharge")[i]);
                    var maxCharge = int.Parse(tsvParser.GetData("MaxCharge")[i]);
                    var mass      = double.Parse(tsvParser.GetData("Mass")[i]);

                    writer.Write(minScan);
                    writer.Write("\t");
                    writer.Write(maxScan);
                    writer.Write("\t");
                    writer.Write(minCharge);
                    writer.Write("\t");
                    writer.Write(maxCharge);
                    writer.Write("\t");
                    writer.Write(mass);
                    writer.Write("\t");

                    var binNum = featureFinder.Comparer.GetBinNumber(mass);

                    var binMass = featureFinder.Comparer.GetMzAverage(binNum);

                    var             binNumList     = (mass < binMass) ? new int[] { binNum, binNum - 1, binNum + 1 } : new int[] { binNum, binNum + 1, binNum - 1 };
                    LcMsPeakCluster refinedFeature = null;

                    foreach (var bi in binNumList)
                    {
                        var tempList = new List <LcMsPeakCluster>();
                        var features = featureFinder.FindFeatures(bi);
                        var massTh   = (mass < 2000) ? tolerance2.GetToleranceAsTh(mass) : tolerance.GetToleranceAsTh(mass);
                        foreach (var feature in features)
                        {
                            if (Math.Abs(mass - feature.Mass) < massTh)
                            {
                                tempList.Add(feature);
                            }
                        }

                        //var nHits = 0;
                        var highestAbu = 0d;
                        //var scans = Enumerable.Range(minScan, maxScan - minScan + 1);
                        foreach (var feature in tempList)
                        {
                            //var scans2 = Enumerable.Range(feature.MinScanNum, feature.MaxScanNum - feature.MinScanNum + 1);
                            //var hitScans = scans.Intersect(scans2).Count();
                            if (feature.MinScanNum < 0.5 * (minScan + maxScan) &&
                                0.5 * (minScan + maxScan) < feature.MaxScanNum)
                            {
                                if (feature.Abundance > highestAbu)
                                {
                                    refinedFeature = feature;
                                    highestAbu     = feature.Abundance;
                                }
                            }

                            /*if (hitScans > 0)
                             * {
                             *  refinedFeature = feature;
                             *  nHits = hitScans;
                             * }*/
                        }

                        if (refinedFeature != null)
                        {
                            break;
                        }
                    }

                    if (refinedFeature != null)
                    {
                        writer.Write(refinedFeature.Mass);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MinScanNum);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxScanNum);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MinCharge);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxCharge);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MinElutionTime);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxElutionTime);
                        writer.Write("\t");
                        writer.Write(refinedFeature.MaxElutionTime - refinedFeature.MinElutionTime);
                        writer.Write("\t");

                        var good = (refinedFeature.MinScanNum <= minScan && refinedFeature.MaxScanNum >= maxScan);
                        writer.Write(good ? 1 : 0);
                        writer.Write("\n");
                        //writer.Write(0); writer.Write("\t");
                        //writer.Write(0); writer.Write("\n");

                        OutputEnvelopPeakStat(id, refinedFeature, targetStatWriter);

                        var chargeRange = featureFinder.GetDetectableMinMaxCharge(refinedFeature.RepresentativeMass, run.MinMs1Mz, run.MaxMs1Mz);
                        refinedFeature.UpdateWithDecoyScore(featureFinder.Ms1Spectra, chargeRange.Item1, chargeRange.Item2);
                        OutputEnvelopPeakStat(id, refinedFeature, decoyStatWriter);
                        id++;
                    }
                    else
                    {
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\t");
                        writer.Write(0);
                        writer.Write("\n");
                    }
                    //var feature = featureFinder.FindLcMsPeakCluster(mass, (int) scan, (int) charge);
                }
                writer.Close();
                targetStatWriter.Close();
                decoyStatWriter.Close();
                Console.WriteLine(dataname);
            }
        }
        public void FindMissingLcMsFeatures()
        {
            var mspfFolder  = @"D:\MassSpecFiles\CompRef_Kelleher\Study3";
            var ms1ftFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3";

            const int Nfraction1 = 3;
            const int Nfraction2 = 5;

            for (var frac1 = 1; frac1 <= Nfraction1; frac1++)
            {
                for (var frac2 = 1; frac2 <= Nfraction2; frac2++)
                {
                    var datasets = GetDataSetNamesStudy3(frac1, frac2);
                    //var outFilePath = string.Format(@"D:\MassSpecFiles\CompRef_Kelleher\study3_GFrep{0}_Gfrac{1}.tsv", frac1.ToString("D2"), frac2.ToString("D2"));
                    var nDataset   = datasets.Count;
                    var prsmReader = new ProteinSpectrumMatchReader();
                    var tolerance  = new Tolerance(12);

                    for (var i = 0; i < nDataset; i++)
                    {
                        var rawFile   = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]);
                        var mspFile   = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]);
                        var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]);
                        var outPath   = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]);

                        if (File.Exists(outPath))
                        {
                            continue;
                        }

                        var run              = PbfLcMsRun.GetLcMsRun(rawFile);
                        var features         = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run);
                        var prsmList         = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder);
                        var prsmFeatureMatch = new bool[prsmList.Count];

                        for (var j = 0; j < features.Count; j++)
                        {
                            //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                            var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                            for (var k = 0; k < prsmList.Count; k++)
                            {
                                var match = prsmList[k];
                                if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                                {
                                    features[j].ProteinSpectrumMatches.Add(match);
                                    prsmFeatureMatch[k] = true;
                                }
                            }
                        }

                        var missingPrsm = new List <ProteinSpectrumMatch>();
                        for (var k = 0; k < prsmList.Count; k++)
                        {
                            if (!prsmFeatureMatch[k])
                            {
                                missingPrsm.Add(prsmList[k]);
                            }
                        }

                        FeatureFind(missingPrsm, run, outPath);
                        Console.WriteLine(outPath);
                    }
                }
            }
        }
Esempio n. 27
0
        public void TestIMERFeatureAlignment()
        {
            const string outFilePath = @"D:\MassSpecFiles\IMER\promex_crosstab.tsv";
            const string rawFolder   = @"D:\MassSpecFiles\IMER";
            var          runLabels   = new string[] { "1", "2", "3", "4", "5", "6" };

            var nDataset = runLabels.Length;
            //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ
            var prsmReader = new ProteinSpectrumMatchReader();
            var tolerance  = new Tolerance(10);
            var alignment  = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance));

            for (var i = 0; i < nDataset; i++)
            {
                var k         = runLabels[i].Equals("2") || runLabels[i].Equals("3") ? 14 : 13;
                var rawFile   = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.pbf", rawFolder, runLabels[i], k);
                var mspFile   = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33_msgfdb_syn.txt", rawFolder, runLabels[i], k);
                var ms1FtFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.ms1ft", rawFolder, runLabels[i], k);

                Console.WriteLine(rawFile);
                Console.WriteLine(File.Exists(rawFile));

                var run      = PbfLcMsRun.GetLcMsRun(rawFile);
                var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run, 500, 15000);

                if (File.Exists(mspFile))
                {
                    var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsGfPlus);

                    for (var j = 0; j < prsmList.Count; j++)
                    {
                        var match = prsmList[j];
                        match.ProteinId = match.ProteinName;
                    }

                    // tag features by PrSMs
                    for (var j = 0; j < features.Count; j++)
                    {
                        //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i);
                        var massTol = tolerance.GetToleranceAsTh(features[j].Mass);
                        foreach (var match in prsmList)
                        {
                            if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol)
                            {
                                features[j].ProteinSpectrumMatches.Add(match);
                            }
                        }
                    }
                }


                alignment.AddDataSet(i, features, run);
            }

            alignment.AlignFeatures();

            Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures);

            for (var i = 0; i < nDataset; i++)
            {
                alignment.FillMissingFeatures(i);
                Console.WriteLine("{0} has been processed", runLabels[i]);
            }

            OutputCrossTabWithId(outFilePath, alignment, runLabels);
        }
Esempio n. 28
0
        private bool FindIon(Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int baseIsotopePeakIndex, out int nIsotopes, out int nMatchedIsotopes)
        {
            //matchedPeakIndex = new List<int>();
            var baseIsotopeIndex   = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex();
            var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities();
            var baseIsotopMz       = ion.GetIsotopeMz(baseIsotopeIndex);

            baseIsotopePeakIndex = _ms2Spec.FindPeakIndex(baseIsotopMz, tolerance);

            nIsotopes        = isotopomerEnvelope.Select(x => x >= relativeIntensityThreshold).Count();
            nMatchedIsotopes = 0;

            if (baseIsotopePeakIndex < 0)
            {
                return(false);
            }
            //if (baseIsotopePeakIndex < 0) baseIsotopePeakIndex = ~baseIsotopePeakIndex;
            nMatchedIsotopes++;

            // go down
            var peakIndex = baseIsotopePeakIndex;

            //matchedPeakIndex.Add(peakIndex);
            for (var isotopeIndex = baseIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--)
            {
                if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold)
                {
                    break;
                }

                var isotopeMz = ion.GetIsotopeMz(isotopeIndex);
                var tolTh     = tolerance.GetToleranceAsTh(isotopeMz);
                var minMz     = isotopeMz - tolTh;
                var maxMz     = isotopeMz + tolTh;
                for (var i = peakIndex - 1; i >= 0; i--)
                {
                    var peakMz = _ms2Spec.Peaks[i].Mz;
                    if (peakMz < minMz)
                    {
                        //peakIndex = i;
                        //break;
                        return(false);
                    }
                    if (peakMz <= maxMz)    // find match, move to prev isotope
                    {
                        peakIndex = i;
                        //matchedPeakIndex.Add(peakIndex);
                        nMatchedIsotopes++;
                        break;
                    }
                }
            }

            // go up
            peakIndex = baseIsotopePeakIndex;
            for (var isotopeIndex = baseIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++)
            {
                if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold)
                {
                    break;
                }

                var isotopeMz = ion.GetIsotopeMz(isotopeIndex);
                var tolTh     = tolerance.GetToleranceAsTh(isotopeMz);
                var minMz     = isotopeMz - tolTh;
                var maxMz     = isotopeMz + tolTh;
                for (var i = peakIndex + 1; i < _ms2Spec.Peaks.Length; i++)
                {
                    var peakMz = _ms2Spec.Peaks[i].Mz;
                    if (peakMz > maxMz)
                    {
                        //peakIndex = i;
                        //break;
                        return(false);
                    }
                    if (peakMz >= minMz)    // find match, move to prev isotope
                    {
                        peakIndex = i;
                        //matchedPeakIndex.Add(peakIndex);
                        nMatchedIsotopes++;
                        break;
                    }
                }
            }

            return(true);
        }
Esempio n. 29
0
        public void TestTagAlignedFeatures()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            var featureDir = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output";
            var mspDir     = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output\MSP";
            var outFile    = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output\aligned_features.tsv";
            var resultFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output\aligned_ids.tsv";

            if (!Directory.Exists(featureDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, featureDir);
            }

            if (!Directory.Exists(mspDir))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, mspDir);
            }

            var dataset = GetDataList(featureDir);

            var tsvParser = new TsvFileParser(outFile);
            var massList  = new List <double>();

            for (var i = 0; i < tsvParser.NumData; i++)
            {
                massList.Add(Double.Parse(tsvParser.GetData("MonoMass")[i]));
            }

            var featureIdMap = new Dictionary <int, string>();
            var tolerance    = new Tolerance(12);
            var headers      = new List <string>();

            //foreach (var data in dataset)
            for (var d = 0; d < dataset.Count; d++)
            {
                var data           = dataset[d];
                var minScanColName = String.Format("{0}_minScan", d);
                var maxScanColName = String.Format("{0}_maxScan", d);

                var fname    = String.Format(@"{0}\{1}_IcTda.tsv", mspDir, data);
                var idParser = new TsvFileParser(fname);
                var idRows   = idParser.GetRows();
                if (headers.Count < 1)
                {
                    headers.AddRange(idParser.GetHeaders());
                }

                for (var i = 0; i < idParser.NumData; i++)
                {
                    var scan   = Int32.Parse(idParser.GetData("Scan")[i]);
                    var mass   = Double.Parse(idParser.GetData("Mass")[i]);
                    var qvalue = Double.Parse(idParser.GetData("QValue")[i]);

                    if (qvalue > 0.01)
                    {
                        break;
                    }

                    var massTol = tolerance.GetToleranceAsTh(mass);

                    var idx = massList.BinarySearch(mass);
                    if (idx < 0)
                    {
                        idx = ~idx;
                    }

                    var found = false;
                    for (var j = idx; j >= 0; j--)
                    {
                        if (Math.Abs(mass - massList[j]) > massTol)
                        {
                            break;
                        }

                        if (tsvParser.GetData(minScanColName)[j].Length < 1)
                        {
                            continue;
                        }

                        if (Int32.Parse(tsvParser.GetData(minScanColName)[j]) < scan && scan < Int32.Parse(tsvParser.GetData(maxScanColName)[j]))
                        {
                            found = true;
                            if (!featureIdMap.ContainsKey(j))
                            {
                                featureIdMap.Add(j, idRows[i]);
                            }
                            break;
                        }
                    }

                    if (found)
                    {
                        continue;
                    }
                    for (var j = idx + 1; j < massList.Count; j++)
                    {
                        if (Math.Abs(mass - massList[j]) > massTol)
                        {
                            break;
                        }
                        if (tsvParser.GetData(minScanColName)[j].Length < 1)
                        {
                            continue;
                        }
                        if (Int32.Parse(tsvParser.GetData(minScanColName)[j]) < scan && scan < Int32.Parse(tsvParser.GetData(maxScanColName)[j]))
                        {
                            found = true;
                            if (!featureIdMap.ContainsKey(j))
                            {
                                featureIdMap.Add(j, idRows[i]);
                            }
                            break;
                        }
                    }
                }
            }

            var writer = new StreamWriter(resultFile);

            writer.Write("AlignedFeatureID"); writer.Write("\t");
            writer.Write(string.Join("\t", headers));
            for (var i = 0; i < 32; i++)
            {
                writer.Write("\t");  writer.Write("{0}", i);
            }
            writer.Write("\n");

            var id = 1;

            foreach (var key in featureIdMap.Keys)
            {
                writer.Write(id); writer.Write("\t");
                writer.Write(featureIdMap[key]);
                for (var i = 0; i < 32; i++)
                {
                    writer.Write("\t"); writer.Write("{0}", tsvParser.GetData(String.Format("{0}", i))[key]);
                }
                writer.Write("\n");
                id++;
            }
            writer.Close();
        }