Ejemplo n.º 1
0
        public void ValidateIcResultsWithModifications()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var parser = new TsvFileParser(resultFilePath);
            var sequences = parser.GetData("Sequence");
            var modifications = parser.GetData("Modifications");
            var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray();
            var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray();
            var aaSet = new AminoAcidSet();
            for (var i = 0; i < parser.NumData; i++)
            {
                var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O;

                var modComposition = Composition.Zero;
                var modsStr = modifications[i].Substring(1, modifications[i].Length - 2);
                var mods = modsStr.Split(',');
                foreach(var modStr in mods)
                {
                    if (modStr.Length == 0) continue;
                    var modName = modStr.Split()[0];
                    var mod = Modification.Get(modName);
                    modComposition += mod.Composition;
                }

                var compFromSeqAndMods = sequenceComp + modComposition;
                Assert.True(compFromSeqAndMods.Equals(compositions[i]));
            }
        }
Ejemplo n.º 2
0
        public void TestRescoring()
        {
            //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA";
            //const int scanNum = 4084;

            const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED";
            const int scanNum = 5448;
            const int charge = 11;

            // Configure amino acid set
            var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var searchModifications = new List<SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                acetylN,
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);            


            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0);
            var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10));
            var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum);
            Console.WriteLine("Total Score = " + scores.Score);
            Console.WriteLine("#Fragments = " + scores.NumMatchedFrags);
        }
Ejemplo n.º 3
0
        public void FilteringEfficiencyQcShew()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw";
            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);
            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm = 10;
            var tolerance = new Tolerance(tolerancePpm);
            sw.Reset();
            sw.Start();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40);
            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;
            
            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var numComparisons = 0L;
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            //const string prot =
            //    "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA";
            //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass;
            //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass)));

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv";
            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var scanNums = tsvReader.GetData("Scan(s)");
            var charges = tsvReader.GetData("Charge");
            var scores = tsvReader.GetData("E-value");
            var sequences = tsvReader.GetData("Peptide");

            //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv";
            //var tsvReader = new TsvFileParser(resultFilePath);
            //var scanNums = tsvReader.GetData("ScanNum");
            //var charges = tsvReader.GetData("Charge");
            //var scores = tsvReader.GetData("Score");
            //var sequences = tsvReader.GetData("Sequence");

            var aaSet = new AminoAcidSet();

            var seqSet = new HashSet<string>();
            var allSeqSet = new HashSet<string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs = 0;
            for (var i = 0; i < scores.Count; i++)
            {
                var score = Convert.ToDouble(scores[i]);
                if (score > 1E-4) continue;
                //if (score < 10) continue;

                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge = Convert.ToInt32(charges[i]);

                var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                if (sequence == null || sequence.Contains("(")) continue;
                //var sequence = sequences[i];
                var composition = aaSet.GetComposition(sequence) + Composition.H2O;

                var precursorIon = new Ion(composition, charge);
                var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
                var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid) continue;
                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec = run.GetSpectrum(precursorScanNum);
                var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec = run.GetSpectrum(nextScanNum);
                var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 4
0
        public void TestNominalMassErrors()
        {
            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

//            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta";

            //const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\TargetProteins.fasta";
            var db = new FastaDatabase(dbFile);
            db.Read();
            var indexedDb = new IndexedDatabase(db);
            var numSequences = 0L;
            sw.Start();

            var hist = new long[11];
            var aaSet = new AminoAcidSet();
            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation = peptideAnnotationAndOffset.Annotation;
                var sequenceStr = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass = sequenceComp.Mass;
                var nominalMass = sequenceComp.NominalMass;
                var error = (int) Math.Round(mass*Constants.RescalingConstant) - nominalMass;
                var errorBin = error + hist.Length/2;
                if (errorBin < 0) errorBin = 0;
                if (errorBin >= hist.Length) errorBin = hist.Length - 1;
                hist[errorBin]++;
            }

            Console.WriteLine("NumSequences: {0}", numSequences);
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length/2, hist[i], hist[i]/(double)numSequences);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 5
0
        public void AnalyizeFusionDdaData()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // Parameters
            //const double relativeIntensityThreshold = 0.7;
            const double precursorTolerancePpm = 20;

            const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw";
            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath);
            const double fdrThreshold = 0.01;

            var tolerance = new Tolerance(precursorTolerancePpm);
            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);

            const string resultFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv";

            Console.WriteLine("IsDecoy\tPeptide\tScanNum\tCharge\tSpecEValue\tQValue\tPrecursorMz" +
                              "\tTheo0\tTheo1\tTheo2\tTheo3" +
                              "\tObs0\tCorr0\tObs1\tCorr1\tObs2\tCorr2\tObs3\tCorr3\tObs-1\tCorr-1\tObs0.5\tCorr0.5");
            foreach (var line in File.ReadLines(resultFilePath))
            {
                if (line.StartsWith("#")) continue;
                var token = line.Split('\t');
                if (token.Length != 16) continue;

                var qValue = Convert.ToDouble(token[14]);
                if (qValue > fdrThreshold) continue;

                var peptide = token[8].Replace("C+57.021", "C");
                var scanNum = Convert.ToInt32(token[2]);
                var charge = Convert.ToInt32(token[7]);
                var specEValue = Convert.ToDouble(token[12]);

                var protein = token[9];
                var isDecoy = protein.StartsWith("XXX_");

                var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge);
                var baseXic = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var baseIntensity = baseXic.GetSumIntensities();

                Console.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", (isDecoy ? 1 : 0), peptide, scanNum, charge, specEValue, qValue, precursorIon.GetMonoIsotopicMz());

                var isotopeIndices = new double[] {0, 1, 2, 3, -1, 0.5};
                var theoIsotopes = precursorIon.GetIsotopes(0.01);
                var numIsotopes = 0;
                foreach (var theoIsotope in theoIsotopes)
                {
                   Console.Write("\t"+theoIsotope.Ratio);
                    if (++numIsotopes == 4) break;
                }

                foreach (var isotopeIndex in isotopeIndices)
                {
                    var isotopeMz = precursorIon.GetIsotopeMz(isotopeIndex);
                    var xic = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum);
                    var relativeIntensity = xic.GetSumIntensities() / baseIntensity;
                    var correlation = xic.GetCorrelation(baseXic);
                    Console.Write("\t{0}\t{1}", relativeIntensity, correlation);
                }
                Console.WriteLine();
            }
        }
Ejemplo n.º 6
0
        public void TestFusionDdaData()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            // Parameters
            const double relativeIntensityThreshold = 0.7;
            const double precursorTolerancePpm = 20;
            //const double isotopeRatioTolerance = 2;
            //const double correlationThreshold = 0.3;
            const double fdrThreshold = 0.01;

            const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw";
            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath);

            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();

            var tolerance = new Tolerance(precursorTolerancePpm);
            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);

            const string resultFilePath = @"D:\Research\Data\UW\Fusion\oldResult\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv";
            var numTargets = 0;
            var numValidTargets = 0;
            var numDecoys = 0;
            var numValidDecoys = 0;

            foreach(var line in File.ReadLines(resultFilePath))
            {
                if (line.StartsWith("#")) continue;
                var token = line.Split('\t');
                if (token.Length != 16) continue;

                var qValue = Convert.ToDouble(token[14]);
                if (qValue > fdrThreshold) continue;

                var peptide = token[8].Replace("C+57.021", "C");
                var scanNum = Convert.ToInt32(token[2]);
                var charge = Convert.ToInt32(token[7]);
                var protein = token[9];
                var isDecoy = protein.StartsWith("XXX_");
                if (isDecoy) numDecoys++;
                else numTargets++;

                var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge);
                var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex();
                var baseXic = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum);
                var baseIntensity = baseXic.GetSumIntensities();

                var isValid = true;
                foreach (var isotope in precursorIon.GetIsotopes(relativeIntensityThreshold))
                {
                    if (isotope.Index == basePeakIndex) continue;
                    var isotopeMz = precursorIon.GetIsotopeMz(isotope.Index);
                    var xic = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum);

                    if (xic.Count == 0)
                    {
                        isValid = false;
                        break;
                    }

                    //if (xic.Count > 0)
                    //{
                    //    var isotopeRatio = xic.GetSumIntensities() / baseIntensity / isotope.Item2;
                    //    var correlation = xic.GetCorrelation(baseXic);
                        
                    //    if (isotopeRatio > 0.8 && isotopeRatio < 1.2
                    //        && correlation > 0.8)
                    //    {
                    //        isValid = true;
                    //    }
                    //}

                    // Check if isotope ratio is within tolerance
                    //if (isotopeRatio > isotopeRatioTolerance || isotopeRatio < 1 / isotopeRatioTolerance)
                    //{
                    //    isValid = false;
                    //    //Console.WriteLine("Off ratio\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, isotopeRatio);
                    //    break;
                    //}

                    // Check if correlation is high
                    //if (correlation < correlationThreshold)
                    //{
                    //    isValid = false;
                    //    //Console.WriteLine("Low correlation\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, correlation);
                    //    break;
                    //}
                }

                if (isValid && !isDecoy) numValidTargets++;
                else if (isValid) numValidDecoys++;

                //Console.WriteLine("{0}\t{1}\t{2}", peptide, scanNum, charge);
            }
            Console.WriteLine("#Targets: {0}", numTargets);
            Console.WriteLine("#ValidTargets: {0}\t{1}", numValidTargets, numValidTargets/(double)numTargets);
            Console.WriteLine("#Decoys: {0}", numDecoys);
            Console.WriteLine("#ValidDecoys: {0}\t{1}", numValidDecoys, numValidDecoys / (double)numDecoys);

            sw.Stop();

            Console.WriteLine(@"TimeForPrecursorValidation {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 7
0
        public void TestXicGen()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw";
            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath);

            // Test
            var tolerance = new Tolerance(30);

            const string peptide = "AIANGQVDGFPTQEECR";
            const int targetScanNum = 37633;
            const int charge = 2;

            //const string peptide = "IVDTNGAGDAFAGGFMAGLTK";
            //const int targetScanNum = 67513;
            //const int charge = 3;

            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);
            var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge);

            Console.WriteLine("Theoretical isotopomer profile:");
            foreach(var p in precursorIon.GetIsotopes(0.1)) Console.WriteLine("{0}\t{1}", precursorIon.GetIsotopeMz(p.Index), p.Ratio);

            var xicArr = new Dictionary<int, Xic>();
            var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex();
            for (var i = -1; i < 3; i++)
            {
                xicArr[i] = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetIsotopeMz(i), tolerance, targetScanNum);
            }

            for (var i = -1; i < 3; i++)
            {
                Console.WriteLine("\nIndex: {0}", i);
                Console.WriteLine("m/z: {0}", precursorIon.GetIsotopeMz(i));
                Console.WriteLine("#XicPeaks: {0}", xicArr[i].Count);
                Console.WriteLine("Intensity: {0}", xicArr[i].GetSumIntensities()/xicArr[basePeakIndex].GetSumIntensities());
                Console.WriteLine("Correlation: {0}", xicArr[i].GetCorrelation(xicArr[basePeakIndex]));
            }
        }
Ejemplo n.º 8
0
        public void TestTimeToComputeIsotopomerProfiles()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            ShowStarting(methodName);

            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);
            const string dbFilePath = @"C:\cygwin\home\kims336\Data\IMS_Sarc\HumanPeptides.txt";

            if (!File.Exists(dbFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath);
            }

            int numPeptides = 0;
            var sw = new System.Diagnostics.Stopwatch();
            sw.Start();

            var isoCalc = IsotopicDistributionCalculator.Instance;
            foreach (var annotation in File.ReadLines(dbFilePath))
            {
                ++numPeptides;
                var peptide = annotation.Substring(2, annotation.Length - 4);
                var composition = aaSet.GetComposition(peptide);
                var molFormula = composition.ToPlainString();
                isoCalc.GetIsotopePattern(molFormula);
                composition.GetIsotopomerEnvelopeRelativeIntensities();
            }

            Console.WriteLine("NumPeptides: " + numPeptides);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 9
0
        public void TestSequenceGraph()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            ShowStarting(methodName);

            var phosPhoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosPhoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosPhoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);
            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var fixCarbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true);

            var searchModifications = new List<SearchModification> { phosPhoS, phosPhoT, phosPhoY, oxM, fixCarbamidomethylC };
            //var searchModifications = new List<SearchModification> { phosPhoT, fixCarbamidomethylC };
            const int numMaxModsPepPeptide = 2;

            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPepPeptide);
            const string annotation = "_.STR._";
            var pepSeq = annotation.Substring(2, annotation.Length - 4);
            Console.WriteLine(aaSet.GetComposition(pepSeq));
            var graph = SequenceGraph.CreateGraph(aaSet, annotation);
            Console.WriteLine(graph.GetUnmodifiedSequenceComposition());
            Assert.AreEqual(graph.GetUnmodifiedSequenceComposition(), aaSet.GetComposition(pepSeq));

            Console.WriteLine("Annotation Compositions:");
            var index = -1;
            foreach (var composition in graph.GetSequenceCompositions())
            {
                Console.WriteLine(++index+": "+composition);
            }

            //const int seqIndex = 1;
            //Console.WriteLine("Fragment Compositions (" + seqIndex +")");
            //var scoringGraph = graph.GetScoringGraph(seqIndex);
            //foreach (var composition in scoringGraph.GetCompositions())
            //{
            //    Console.WriteLine(composition);
            //}
        }
Ejemplo n.º 10
0
        public void TestPeptide()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            ShowStarting(methodName);

            //const string sequence = "MSGRGKGGKGLGKGGAKRHRKVLRDNIQGITKPAIRRLARRGGVKRISGLIYEETRGVLKVFLENVIRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG";  // Histone H4
            const string sequence = "IRDAVTYTEHAKRKTVTAMDVVYALKRQGRTLYGFGG";  // Histone H4
            //const string sequence = "MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGIVVDYVLEFDVPDELIVDRIVGRRVHAASGRVYHVKFNPPKVEGKDDVTGEDLTTRKDDQEETVRKRLVEYHQMTAPLIGYYQKEAEAGNTKYAKVDGTQAVADVRAALEKILG";
            //const string sequence = "MNKTQLIDVIAEKAELSKTQAKAALESTLAAITESLKEGDAVQLVGFGTFKVNHRAERTGRNPQTGKEIKIAAANVPAFVSGKALKDAVK";
            //const string sequence =
            //    "METTKPSFQDVLEFVRLFRRKNKLQREIQDVEKKIRDNQKRVLLLDNLSDYIKPGMSVEAIQGIIASMKGDYEDRVDDYIIKNAELSKERRDISKKLKAMGEMKNGEAK";
            var aaSet = new AminoAcidSet();
            var composition = aaSet.GetComposition(sequence) + Composition.H2O;

            Console.WriteLine(composition);
            Console.WriteLine(composition.Mass);
            Console.WriteLine(composition.NominalMass);
            // 2nd isotope
            Console.WriteLine(composition.GetIsotopeMass(0));
            Console.WriteLine(composition.GetIsotopeMass(1));
            Console.WriteLine(composition.GetIsotopeMass(2));
            //Assert.AreEqual(composition.ToPlainString(), "C34H51N7O14");

            Console.WriteLine("Isotopomer Envelope:");
            foreach (var e in composition.GetIsotopomerEnvelopeRelativeIntensities()) Console.WriteLine(e);
            Console.WriteLine();

            Console.WriteLine("Isotope ions:");
            var ion = new Ion(composition + Composition.H2O, 13);
            foreach (var p in ion.GetIsotopes(0.1)) Console.WriteLine("{0}\t{1}", ion.GetIsotopeMz(p.Index), p.Ratio);
            Console.WriteLine();
        }
Ejemplo n.º 11
0
        public void TestIsoProfile()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string sequence = "MWYMISAQDVENSLEKRLAARPAHLARLQELADEGRLLVAGPHPAIDSENPGDAGFSGSLVVADFDSLATAQAWADADPYFAAGVYQSVVVKPFKRVLP";
            var aaSet = new AminoAcidSet();
            var comp = aaSet.GetComposition(sequence) + Composition.H2O;
            var ion = new Ion(comp, 9);
            foreach (var i in ion.GetIsotopes(0.1))
            {
                Console.WriteLine(ion.GetIsotopeMz(i.Index)+"\t"+i.Ratio);
            }
        }
Ejemplo n.º 12
0
        public static Dictionary<string, MatchedTagSet> GetProteinToMatchedTagsMap(
            IEnumerable<SequenceTag> tags, 
            SearchableDatabase searchableDb, 
            AminoAcidSet aaSet, 
            Tolerance tolerance,
            Tolerance relaxedTolerance)
        {
            var fastaDb = searchableDb.FastaDatabase;
            var proteinsToTags = new Dictionary<string, MatchedTagSet>();
            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                if (matchedIndices.Length > MaxNumProteinMatchesPerTag) continue;
                foreach (var index in matchedIndices)
                {
                    var proteinName = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetZeroBasedPositionInProtein(index);
                    var mass = aaSet.GetComposition(tag.Sequence).Mass;
                    var matchedTag = new MatchedTag(tag, startIndex) { Mass = mass };
                    MatchedTagSet existingMatchedTagSet;
                    if (proteinsToTags.TryGetValue(proteinName, out existingMatchedTagSet))
                    {
                        existingMatchedTagSet.Add(matchedTag);
                    }
                    else
                    {
                        var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                        var matchedTagSet = new MatchedTagSet(proteinSequence, aaSet, tolerance, relaxedTolerance);
                        matchedTagSet.Add(matchedTag);
                        proteinsToTags.Add(proteinName, matchedTagSet);
                    }
                }
            }

            return proteinsToTags;
        }
Ejemplo n.º 13
0
        public void CreateTargetList()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string databaseFilePath = @"D:\Research\Data\IPRG2014\database\SpikedInPeptides.fasta";
            if (!File.Exists(databaseFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, databaseFilePath);
            }

            var database = new FastaDatabase(databaseFilePath);
            database.Read();
            var indexedDatabase = new IndexedDatabase(database);
            var numTargets = 0;

            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);

            Console.WriteLine("Peptide\tFormula\tProtein");
            foreach (var annotationAndOffset in indexedDatabase.AnnotationsAndOffsets(6, 30, 1, 1, Enzyme.Trypsin))
            {
                var annotation = annotationAndOffset.Annotation;
                var peptide = annotation.Substring(2, annotation.Length - 4);
                var offset = annotationAndOffset.Offset;

                Console.WriteLine("{0}\t{1}\t{2}", peptide, (aaSet.GetComposition(peptide) + Composition.H2O).ToPlainString(), database.GetProteinName(offset));
                numTargets++;
            }
            Console.WriteLine("NumTargets: {0}", numTargets);
        }
Ejemplo n.º 14
0
        public void ComputeSpikedInPeptideMzHist()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string pepListFile = @"C:\cygwin\home\kims336\Data\DIA\SpikedPeptides.txt";

            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);
            var charges = new[] {2};

            var hist = new int[4];

            var sum = 0;

            Console.WriteLine("Peptide\tCharge\tMz");
            foreach (var line in File.ReadLines(pepListFile))
            {
                if (line.Length == 0) continue;
                var peptide = line;
                var composition = aaSet.GetComposition(peptide) + Composition.H2O;

                foreach (var charge in charges)
                {
                    var precursorIon = new Ion(composition, charge);
                    var precursorIonMz = precursorIon.GetMonoIsotopicMz();

                    if (precursorIonMz < 400 || precursorIonMz >= 900) continue;
                    var histIndex = (int)((precursorIonMz - 400)/125);
                    hist[histIndex]++;

                    Console.WriteLine("{0}\t{1}\t{2}\t{3}", peptide, charge, precursorIonMz, histIndex);

                    sum++;
                }
            }

            Console.WriteLine("\nRange\tNum\tRatio");
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}-{1}\t{2}\t{3}", 400+i*125, 525+i*125, hist[i], hist[i] / (float)sum);
            }

        }