Exemple #1
0
        public void CreateTargetList()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string databaseFilePath = @"D:\Research\Data\IPRG2014\database\SpikedInPeptides.fasta";

            if (!File.Exists(databaseFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, databaseFilePath);
            }

            var database = new FastaDatabase(databaseFilePath);

            database.Read();
            var indexedDatabase = new IndexedDatabase(database);
            var numTargets      = 0;

            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);

            Console.WriteLine("Peptide\tFormula\tProtein");
            foreach (var annotationAndOffset in indexedDatabase.AnnotationsAndOffsets(6, 30, 1, 1, Enzyme.Trypsin))
            {
                var annotation = annotationAndOffset.Annotation;
                var peptide    = annotation.Substring(2, annotation.Length - 4);
                var offset     = annotationAndOffset.Offset;

                Console.WriteLine("{0}\t{1}\t{2}", peptide, (aaSet.GetComposition(peptide) + Composition.H2O).ToPlainString(), database.GetProteinName(offset));
                numTargets++;
            }
            Console.WriteLine("NumTargets: {0}", numTargets);
        }
Exemple #2
0
        public void TestSequenceGraph()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            ShowStarting(methodName);

            var phosPhoS            = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosPhoT            = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosPhoY            = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);
            var oxM                 = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var fixCarbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true);

            var searchModifications = new List <SearchModification> {
                phosPhoS, phosPhoT, phosPhoY, oxM, fixCarbamidomethylC
            };
            //var searchModifications = new List<SearchModification> { phosPhoT, fixCarbamidomethylC };
            const int numMaxModsPepPeptide = 2;

            var          aaSet      = new AminoAcidSet(searchModifications, numMaxModsPepPeptide);
            const string annotation = "_.STR._";
            var          pepSeq     = annotation.Substring(2, annotation.Length - 4);

            Console.WriteLine(aaSet.GetComposition(pepSeq));
            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            Console.WriteLine(graph.GetUnmodifiedSequenceComposition());
            Assert.AreEqual(graph.GetUnmodifiedSequenceComposition(), aaSet.GetComposition(pepSeq));

            Console.WriteLine("Annotation Compositions:");
            var index = -1;

            foreach (var composition in graph.GetSequenceCompositions())
            {
                Console.WriteLine(++index + ": " + composition);
            }

            //const int seqIndex = 1;
            //Console.WriteLine("Fragment Compositions (" + seqIndex +")");
            //var scoringGraph = graph.GetScoringGraph(seqIndex);
            //foreach (var composition in scoringGraph.GetCompositions())
            //{
            //    Console.WriteLine(composition);
            //}
        }
Exemple #3
0
        public void TestNominalMassErrors()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_003962_71E1A1D4.fasta"));

            var db = new FastaDatabase(fastaFile.FullName);

            db.Read();
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;

            sw.Start();

            var hist  = new long[11];
            var aaSet = new AminoAcidSet();

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation   = peptideAnnotationAndOffset.Annotation;
                var sequenceStr  = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass         = sequenceComp.Mass;
                var nominalMass  = sequenceComp.NominalMass;
                var error        = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass;
                var errorBin     = error + hist.Length / 2;
                if (errorBin < 0)
                {
                    errorBin = 0;
                }
                if (errorBin >= hist.Length)
                {
                    errorBin = hist.Length - 1;
                }
                hist[errorBin]++;
            }

            Console.WriteLine("NumSequences: {0}", numSequences);
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemple #4
0
        public void TestNominalMassErrors()
        {
            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

//            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta";

            //const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\TargetProteins.fasta";
            var db = new FastaDatabase(dbFile);

            db.Read();
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;

            sw.Start();

            var hist  = new long[11];
            var aaSet = new AminoAcidSet();

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation   = peptideAnnotationAndOffset.Annotation;
                var sequenceStr  = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass         = sequenceComp.Mass;
                var nominalMass  = sequenceComp.NominalMass;
                var error        = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass;
                var errorBin     = error + hist.Length / 2;
                if (errorBin < 0)
                {
                    errorBin = 0;
                }
                if (errorBin >= hist.Length)
                {
                    errorBin = hist.Length - 1;
                }
                hist[errorBin]++;
            }

            Console.WriteLine("NumSequences: {0}", numSequences);
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemple #5
0
        public void ComputeSpikedInPeptideMzHist()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string pepListFile = @"C:\cygwin\home\kims336\Data\DIA\SpikedPeptides.txt";

            var aaSet   = new AminoAcidSet(Modification.Carbamidomethylation);
            var charges = new[] { 2 };

            var hist = new int[4];

            var sum = 0;

            Console.WriteLine("Peptide\tCharge\tMz");
            foreach (var line in File.ReadLines(pepListFile))
            {
                if (line.Length == 0)
                {
                    continue;
                }
                var peptide     = line;
                var composition = aaSet.GetComposition(peptide) + Composition.H2O;

                foreach (var charge in charges)
                {
                    var precursorIon   = new Ion(composition, charge);
                    var precursorIonMz = precursorIon.GetMonoIsotopicMz();

                    if (precursorIonMz < 400 || precursorIonMz >= 900)
                    {
                        continue;
                    }
                    var histIndex = (int)((precursorIonMz - 400) / 125);
                    hist[histIndex]++;

                    Console.WriteLine("{0}\t{1}\t{2}\t{3}", peptide, charge, precursorIonMz, histIndex);

                    sum++;
                }
            }

            Console.WriteLine("\nRange\tNum\tRatio");
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}-{1}\t{2}\t{3}", 400 + i * 125, 525 + i * 125, hist[i], hist[i] / (float)sum);
            }
        }
Exemple #6
0
        public static Dictionary <string, MatchedTagSet> GetProteinToMatchedTagsMap(
            IEnumerable <SequenceTag.SequenceTag> tags,
            SearchableDatabase searchableDb,
            AminoAcidSet aaSet,
            Tolerance tolerance,
            Tolerance relaxedTolerance)
        {
            var fastaDb        = searchableDb.FastaDatabase;
            var proteinsToTags = new Dictionary <string, MatchedTagSet>();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                if (matchedIndices.Length > MaxNumProteinMatchesPerTag)
                {
                    continue;
                }
                foreach (var index in matchedIndices)
                {
                    var proteinName = fastaDb.GetProteinName(index);
                    var startIndex  = fastaDb.GetZeroBasedPositionInProtein(index);
                    var mass        = aaSet.GetComposition(tag.Sequence).Mass;
                    var matchedTag  = new MatchedTag(tag, startIndex)
                    {
                        Mass = mass
                    };
                    MatchedTagSet existingMatchedTagSet;
                    if (proteinsToTags.TryGetValue(proteinName, out existingMatchedTagSet))
                    {
                        existingMatchedTagSet.Add(matchedTag);
                    }
                    else
                    {
                        var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                        if (proteinSequence == null)
                        {
                            proteinSequence = proteinName;
                        }

                        var matchedTagSet = new MatchedTagSet(proteinSequence, aaSet, tolerance, relaxedTolerance);
                        matchedTagSet.Add(matchedTag);
                        proteinsToTags.Add(proteinName, matchedTagSet);
                    }
                }
            }

            return(proteinsToTags);
        }
Exemple #7
0
        public void TestIsoProfile()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string sequence = "MWYMISAQDVENSLEKRLAARPAHLARLQELADEGRLLVAGPHPAIDSENPGDAGFSGSLVVADFDSLATAQAWADADPYFAAGVYQSVVVKPFKRVLP";
            var          aaSet    = new AminoAcidSet();
            var          comp     = aaSet.GetComposition(sequence) + Composition.H2O;
            var          ion      = new Ion(comp, 9);

            foreach (var i in ion.GetIsotopes(0.1))
            {
                Console.WriteLine(ion.GetIsotopeMz(i.Index) + "\t" + i.Ratio);
            }
        }
Exemple #8
0
        private static MSMSSpectrum ReadSpectrum(string s)
        {
            var      precursorMz = 0.0;
            var      charge      = 0;
            Sequence annotation  = null;
            var      peaks       = new List <MSMSSpectrumPeak>();
            var      token       = s.Split('\n');

            foreach (var t in token)
            {
                if (t.Length == 0)
                {
                    continue;
                }
                if (char.IsDigit(t[0]))
                {
                    var p = t.Split(new [] { "\t", " " }, StringSplitOptions.None);
                    peaks.Add(new MSMSSpectrumPeak(double.Parse(p[0]), double.Parse(p[1])));
                }
                else if (t.StartsWith("CHARGE"))
                {
                    var chargeStr = t.Substring(t.IndexOf('=') + 1).Trim();
                    if (chargeStr.StartsWith("+"))
                    {
                        chargeStr = chargeStr.Substring(1);
                    }
                    if (chargeStr.EndsWith("+"))
                    {
                        chargeStr = chargeStr.Substring(0, chargeStr.Length - 1);
                    }
                    charge = int.Parse(chargeStr);
                }
                else if (t.StartsWith("SEQ"))
                {
                    var annotationStr        = t.Substring(t.LastIndexOf('=') + 1);
                    var precursorComposition = AminoAcidSet.GetComposition(annotationStr);
                    var peptideComposition   = precursorComposition + Composition.H2O;
                    annotation = new Sequence(annotationStr, AminoAcidSet);
                }
                else if (t.StartsWith("PEPMASS"))
                {
                    var p = t.Substring(t.IndexOf('=') + 1).Split(new[] { "\\s+" }, StringSplitOptions.None);
                    precursorMz = double.Parse(p[0]);
                }
            }
            return(new MSMSSpectrum(charge, precursorMz, annotation, peaks));
        }
Exemple #9
0
        public void TestXicGen()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw";
            var          run          = InMemoryLcMsRun.GetLcMsRun(specFilePath);

            // Test
            var tolerance = new Tolerance(30);

            const string peptide       = "AIANGQVDGFPTQEECR";
            const int    targetScanNum = 37633;
            const int    charge        = 2;

            //const string peptide = "IVDTNGAGDAFAGGFMAGLTK";
            //const int targetScanNum = 67513;
            //const int charge = 3;

            var aaSet        = new AminoAcidSet(Modification.Carbamidomethylation);
            var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge);

            Console.WriteLine("Theoretical isotopomer profile:");
            foreach (var p in precursorIon.GetIsotopes(0.1))
            {
                Console.WriteLine("{0}\t{1}", precursorIon.GetIsotopeMz(p.Index), p.Ratio);
            }

            var xicArr        = new Dictionary <int, Xic>();
            var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex();

            for (var i = -1; i < 3; i++)
            {
                xicArr[i] = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetIsotopeMz(i), tolerance, targetScanNum);
            }

            for (var i = -1; i < 3; i++)
            {
                Console.WriteLine("\nIndex: {0}", i);
                Console.WriteLine("m/z: {0}", precursorIon.GetIsotopeMz(i));
                Console.WriteLine("#XicPeaks: {0}", xicArr[i].Count);
                Console.WriteLine("Intensity: {0}", xicArr[i].GetSumIntensities() / xicArr[basePeakIndex].GetSumIntensities());
                Console.WriteLine("Correlation: {0}", xicArr[i].GetCorrelation(xicArr[basePeakIndex]));
            }
        }
Exemple #10
0
        public void ValidateIcResultsWithModifications()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var parser        = new TsvFileParser(resultFilePath);
            var sequences     = parser.GetData("Sequence");
            var modifications = parser.GetData("Modifications");
            var compositions  = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums      = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var aaSet         = new AminoAcidSet();

            for (var i = 0; i < parser.NumData; i++)
            {
                var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O;

                var modComposition = Composition.Zero;
                var modsStr        = modifications[i].Substring(1, modifications[i].Length - 2);
                var mods           = modsStr.Split(',');
                foreach (var modStr in mods)
                {
                    if (modStr.Length == 0)
                    {
                        continue;
                    }
                    var modName = modStr.Split()[0];
                    var mod     = Modification.Get(modName);
                    modComposition += mod.Composition;
                }

                var compFromSeqAndMods = sequenceComp + modComposition;
                Assert.True(compFromSeqAndMods.Equals(compositions[i]));
            }
        }
Exemple #11
0
        public void TestPeptide()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            ShowStarting(methodName);

            //const string sequence = "MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG";  // Histone H4
            const string sequence = "IRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG";  // Histone H4
            //const string sequence = "MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGIVVDYVLEFDVPDELIVDRIVGRRVHAASGRVYHVKFNPPKVEGKDDVTGEDLTTRKDDQEETVRKRLVEYHQMTAPLIGYYQKEAEAGNTKYAKVDGTQAVADVRAALEKILG";
            //const string sequence = "MNKTQLIDVIAEKAELSKTQAKAALESTLAAITESLKEGDAVQLVGFGTFKVNHRAERTGRNPQTGKEIKIAAANVPAFVSGKALKDAVK";
            //const string sequence =
            //    "METTKPSFQDVLEFVRLFRRKNKLQREIQDVEKKIRDNQKRVLLLDNLSDYIKPGMSVEAIQGIIASMKGDYEDRVDDYIIKNAELSKERRDISKKLKAMGEMKNGEAK";
            var aaSet       = new AminoAcidSet();
            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            Console.WriteLine(composition);
            Console.WriteLine(composition.Mass);
            Console.WriteLine(composition.NominalMass);
            // 2nd isotope
            Console.WriteLine(composition.GetIsotopeMass(0));
            Console.WriteLine(composition.GetIsotopeMass(1));
            Console.WriteLine(composition.GetIsotopeMass(2));
            //Assert.AreEqual(composition.ToPlainString(), "C34H51N7O14");

            Console.WriteLine("Isotopomer Envelope:");
            foreach (var e in composition.GetIsotopomerEnvelopeRelativeIntensities())
            {
                Console.WriteLine(e);
            }
            Console.WriteLine();

            Console.WriteLine("Isotope ions:");
            var ion = new Ion(composition + Composition.H2O, 13);

            foreach (var p in ion.GetIsotopes(0.1))
            {
                Console.WriteLine("{0}\t{1}", ion.GetIsotopeMz(p.Index), p.Ratio);
            }
            Console.WriteLine();
        }
        public void TestRescoring()
        {
            //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA";
            //const int scanNum = 4084;

            const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED";
            const int    scanNum  = 5448;
            const int    charge   = 11;

            // Configure amino acid set
            var acetylN      = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM          = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC     = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                acetylN,
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);


            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            var run            = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0);
            var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10));
            var scores         = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum);

            Console.WriteLine("Total Score = " + scores.Score);
            Console.WriteLine("#Fragments = " + scores.NumMatchedFrags);
        }
Exemple #13
0
        public void TestTimeToComputeIsotopomerProfiles()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            ShowStarting(methodName);

            var          aaSet      = new AminoAcidSet(Modification.Carbamidomethylation);
            const string dbFilePath = @"C:\cygwin\home\kims336\Data\IMS_Sarc\HumanPeptides.txt";

            if (!File.Exists(dbFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath);
            }

            int numPeptides = 0;
            var sw          = new System.Diagnostics.Stopwatch();

            sw.Start();

            var isoCalc = IsotopicDistributionCalculator.Instance;

            foreach (var annotation in File.ReadLines(dbFilePath))
            {
                ++numPeptides;
                var peptide     = annotation.Substring(2, annotation.Length - 4);
                var composition = aaSet.GetComposition(peptide);
                var molFormula  = composition.ToPlainString();
                isoCalc.GetIsotopePattern(molFormula);
                composition.GetIsotopomerEnvelopeRelativeIntensities();
            }

            Console.WriteLine("NumPeptides: " + numPeptides);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemple #14
0
        public void TestRescoring(int scanNum, int charge, string sequence, double expectedScore)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var acetylN      = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM          = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC     = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                acetylN,
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            var run            = PbfLcMsRun.GetLcMsRun(pbfFile.FullName, 0, 0);
            var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10));
            var scores         = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum);

            Console.WriteLine("Total Score = " + scores.Score);
            Console.WriteLine("#Fragments = " + scores.NumMatchedFrags);

            Assert.AreEqual(expectedScore, scores.Score, 0.0001);
        }
Exemple #15
0
        public void FilteringEfficiencyQcShew()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm       = 10;
            var       tolerance          = new Tolerance(tolerancePpm);

            sw.Reset();
            sw.Start();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var          minBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var          maxBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var          numComparisons = 0L;

            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            //const string prot =
            //    "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA";
            //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass;
            //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass)));

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var scanNums  = tsvReader.GetData("Scan(s)");
            var charges   = tsvReader.GetData("Charge");
            var scores    = tsvReader.GetData("E-value");
            var sequences = tsvReader.GetData("Peptide");

            //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv";
            //var tsvReader = new TsvFileParser(resultFilePath);
            //var scanNums = tsvReader.GetData("ScanNum");
            //var charges = tsvReader.GetData("Charge");
            //var scores = tsvReader.GetData("Score");
            //var sequences = tsvReader.GetData("Sequence");

            var aaSet = new AminoAcidSet();

            var seqSet             = new HashSet <string>();
            var allSeqSet          = new HashSet <string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs         = 0;

            for (var i = 0; i < scores.Count; i++)
            {
                var score = Convert.ToDouble(scores[i]);
                if (score > 1E-4)
                {
                    continue;
                }
                //if (score < 10) continue;

                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge  = Convert.ToInt32(charges[i]);

                var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                if (sequence == null || sequence.Contains("("))
                {
                    continue;
                }
                //var sequence = sequences[i];
                var composition = aaSet.GetComposition(sequence) + Composition.H2O;

                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }
                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec    = run.GetSpectrum(precursorScanNum);
                var corr1            = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec    = run.GetSpectrum(nextScanNum);
                var corr2       = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemple #16
0
        public void TestFusionDdaData()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Parameters
            const double relativeIntensityThreshold = 0.7;
            const double precursorTolerancePpm      = 20;
            //const double isotopeRatioTolerance = 2;
            //const double correlationThreshold = 0.3;
            const double fdrThreshold = 0.01;

            const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw";
            var          run          = InMemoryLcMsRun.GetLcMsRun(specFilePath);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var tolerance = new Tolerance(precursorTolerancePpm);
            var aaSet     = new AminoAcidSet(Modification.Carbamidomethylation);

            const string resultFilePath  = @"D:\Research\Data\UW\Fusion\oldResult\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv";
            var          numTargets      = 0;
            var          numValidTargets = 0;
            var          numDecoys       = 0;
            var          numValidDecoys  = 0;

            foreach (var line in File.ReadLines(resultFilePath))
            {
                if (line.StartsWith("#"))
                {
                    continue;
                }
                var token = line.Split('\t');
                if (token.Length != 16)
                {
                    continue;
                }

                var qValue = Convert.ToDouble(token[14]);
                if (qValue > fdrThreshold)
                {
                    continue;
                }

                var peptide = token[8].Replace("C+57.021", "C");
                var scanNum = Convert.ToInt32(token[2]);
                var charge  = Convert.ToInt32(token[7]);
                var protein = token[9];
                var isDecoy = protein.StartsWith("XXX_");
                if (isDecoy)
                {
                    numDecoys++;
                }
                else
                {
                    numTargets++;
                }

                var precursorIon  = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge);
                var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex();
                var baseXic       = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var baseIntensity = baseXic.GetSumIntensities();

                var isValid = true;
                foreach (var isotope in precursorIon.GetIsotopes(relativeIntensityThreshold))
                {
                    if (isotope.Index == basePeakIndex)
                    {
                        continue;
                    }
                    var isotopeMz = precursorIon.GetIsotopeMz(isotope.Index);
                    var xic       = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum);

                    if (xic.Count == 0)
                    {
                        isValid = false;
                        break;
                    }

                    //if (xic.Count > 0)
                    //{
                    //    var isotopeRatio = xic.GetSumIntensities() / baseIntensity / isotope.Item2;
                    //    var correlation = xic.GetCorrelation(baseXic);

                    //    if (isotopeRatio > 0.8 && isotopeRatio < 1.2
                    //        && correlation > 0.8)
                    //    {
                    //        isValid = true;
                    //    }
                    //}

                    // Check if isotope ratio is within tolerance
                    //if (isotopeRatio > isotopeRatioTolerance || isotopeRatio < 1 / isotopeRatioTolerance)
                    //{
                    //    isValid = false;
                    //    //Console.WriteLine("Off ratio\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, isotopeRatio);
                    //    break;
                    //}

                    // Check if correlation is high
                    //if (correlation < correlationThreshold)
                    //{
                    //    isValid = false;
                    //    //Console.WriteLine("Low correlation\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, correlation);
                    //    break;
                    //}
                }

                if (isValid && !isDecoy)
                {
                    numValidTargets++;
                }
                else if (isValid)
                {
                    numValidDecoys++;
                }

                //Console.WriteLine("{0}\t{1}\t{2}", peptide, scanNum, charge);
            }
            Console.WriteLine("#Targets: {0}", numTargets);
            Console.WriteLine("#ValidTargets: {0}\t{1}", numValidTargets, numValidTargets / (double)numTargets);
            Console.WriteLine("#Decoys: {0}", numDecoys);
            Console.WriteLine("#ValidDecoys: {0}\t{1}", numValidDecoys, numValidDecoys / (double)numDecoys);

            sw.Stop();

            Console.WriteLine(@"TimeForPrecursorValidation {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemple #17
0
        public void AnalyizeFusionDdaData()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Parameters
            //const double relativeIntensityThreshold = 0.7;
            const double precursorTolerancePpm = 20;

            const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw";
            var          run          = InMemoryLcMsRun.GetLcMsRun(specFilePath);
            const double fdrThreshold = 0.01;

            var tolerance = new Tolerance(precursorTolerancePpm);
            var aaSet     = new AminoAcidSet(Modification.Carbamidomethylation);

            const string resultFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv";

            Console.WriteLine("IsDecoy\tPeptide\tScanNum\tCharge\tSpecEValue\tQValue\tPrecursorMz" +
                              "\tTheo0\tTheo1\tTheo2\tTheo3" +
                              "\tObs0\tCorr0\tObs1\tCorr1\tObs2\tCorr2\tObs3\tCorr3\tObs-1\tCorr-1\tObs0.5\tCorr0.5");
            foreach (var line in File.ReadLines(resultFilePath))
            {
                if (line.StartsWith("#"))
                {
                    continue;
                }
                var token = line.Split('\t');
                if (token.Length != 16)
                {
                    continue;
                }

                var qValue = Convert.ToDouble(token[14]);
                if (qValue > fdrThreshold)
                {
                    continue;
                }

                var peptide    = token[8].Replace("C+57.021", "C");
                var scanNum    = Convert.ToInt32(token[2]);
                var charge     = Convert.ToInt32(token[7]);
                var specEValue = Convert.ToDouble(token[12]);

                var protein = token[9];
                var isDecoy = protein.StartsWith("XXX_");

                var precursorIon  = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge);
                var baseXic       = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var baseIntensity = baseXic.GetSumIntensities();

                Console.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", (isDecoy ? 1 : 0), peptide, scanNum, charge, specEValue, qValue, precursorIon.GetMonoIsotopicMz());

                var isotopeIndices = new double[] { 0, 1, 2, 3, -1, 0.5 };
                var theoIsotopes   = precursorIon.GetIsotopes(0.01);
                var numIsotopes    = 0;
                foreach (var theoIsotope in theoIsotopes)
                {
                    Console.Write("\t" + theoIsotope.Ratio);
                    if (++numIsotopes == 4)
                    {
                        break;
                    }
                }

                foreach (var isotopeIndex in isotopeIndices)
                {
                    var isotopeMz         = precursorIon.GetIsotopeMz(isotopeIndex);
                    var xic               = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum);
                    var relativeIntensity = xic.GetSumIntensities() / baseIntensity;
                    var correlation       = xic.GetCorrelation(baseXic);
                    Console.Write("\t{0}\t{1}", relativeIntensity, correlation);
                }
                Console.WriteLine();
            }
        }