Exemplo n.º 1
0
 /// <summary>
 /// Initializes static members of the <see cref="MsgfPlusSequenceReader"/> class.
 /// </summary>
 static MsgfPlusSequenceReader()
 {
     AminoAcidSet = new AminoAcidSet();
 }
Exemplo n.º 2
0
        public void TestPrSm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw";
            //const string annotation =
            //    "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" +
            //    "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" +
            //    "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" +
            //    "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" +
            //    "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" +
            //    "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" +
            //    "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._";
            //var aaSet = new AminoAcidSet();

            //const int charge = 60;
            //const int ms2ScanNum = 46661;

            const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const int    ms2ScanNum = 19011;
            const int    charge     = 7;
            const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._";

            var acetylN    = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true);
            var modVal     = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0));
            var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList();

            searchMods.Add(acetylN);
            const int numMaxModsPerProtein = 1;
            var       aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein);

            var graph = SequenceGraph.CreateGraph(aaSet, annotation);

            Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions());

            var run       = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);
            var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15);

            ms2Scorer.GetScorer(ms2ScanNum);
            var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum);

            Assert.NotNull(scorer, "Scorer is null!");

            for (var i = 0; i < graph.GetNumProteoformCompositions(); i++)
            {
                graph.SetSink(i);
                Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]);
                var score = graph.GetFragmentScore(scorer);
                Console.WriteLine("Fast search score: " + score);
                var composition = graph.GetSinkSequenceCompositionWithH2O();

                var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10));
                var refinedScore   = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum);
                Console.WriteLine("Modifications: {0}", refinedScore.Modifications);
                Console.WriteLine("Composition: {0}", composition);
                Console.WriteLine("RefinedScores: {0}", refinedScore);
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Parse a protein/peptide sequence in the LCMSSpectator style.
        /// </summary>
        /// <param name="sequence">The sequence as a string.</param>
        /// <returns>The parsed sequence.</returns>
        public Sequence Read(string sequence)
        {
            if (this.trimAnnotations)
            {
                var firstIndex = sequence.IndexOf('.');
                if (firstIndex >= 0)
                {
                    var index = Math.Min(firstIndex + 1, sequence.Length - 1);
                    sequence = sequence.Substring(index, sequence.Length - index - 1);
                }

                var lastIndex = sequence.LastIndexOf('.');
                if (lastIndex >= 0)
                {
                    var index = Math.Min(lastIndex, sequence.Length - 1);
                    sequence = sequence.Substring(0, index);
                }
            }

            const string AminoAcidRegex = @"[" + AminoAcid.StandardAminoAcidCharacters + "]";
            ////const string modRegex = @"\[([A-Z]|[a-z])+\]";
            const string ModRegex = @"\[([A-Z]|[a-z]|[0-9]|-|>)+\]";

            if (string.IsNullOrEmpty(sequence))
            {
                return(new Sequence(new List <AminoAcid>()));
            }

            if (!Regex.IsMatch(sequence, "(" + AminoAcidRegex + "|" + ModRegex + ")+"))
            {
                return(null);
            }

            var stdAaSet      = new AminoAcidSet();
            var aminoAcidList = new List <AminoAcid>();

            var       matches = Regex.Matches(sequence, "(" + AminoAcidRegex + "|" + ModRegex + ")");
            AminoAcid aa      = null;
            var       mods    = new List <Modification>();

            foreach (Match match in matches)
            {
                var element = match.Value;
                if (element.Length == 0)
                {
                    continue;
                }

                if (element.Length == 1 && char.IsLetter(element[0]))
                { // amino acid
                    if (aa != null)
                    {
                        aa = mods.Aggregate(aa, (current, mod) => new ModifiedAminoAcid(current, mod));
                        aminoAcidList.Add(aa);
                        mods = new List <Modification>();
                    }

                    aa = stdAaSet.GetAminoAcid(element[0]);
                    if (aa == null)
                    {
                        throw new FormatException("Unrecognized amino acid character: " + element[0]);
                    }
                    ////                    Console.WriteLine("{0} {1} {2}", aa.Residue, aa.Composition, aa.GetMass());
                }
                else
                {
                    var modName = element.Substring(1, element.Length - 2);
                    var mod     = Modification.Get(modName);
                    if (mod == null)
                    {
                        throw new FormatException("Unrecognized modification: " + modName);
                    }

                    mods.Add(mod);
                    ////                    Console.WriteLine("{0} {1} {2}", mod.Name, mod.Composition, mod.Composition.AveragineMass);
                }
            }

            if (aa != null)
            {
                aa = mods.Aggregate(aa, (current, mod) => new ModifiedAminoAcid(current, mod));
                aminoAcidList.Add(aa);
            }

            return(new Sequence(aminoAcidList));
        }
Exemplo n.º 4
0
        private void TestTopDownSearch(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet,
                                       int minSequenceLength, int maxSequenceLength,
                                       int minPrecursorIonCharge, int maxPrecursorIonCharge,
                                       int minProductIonCharge, int maxProductIonCharge,
                                       double minSequenceMass, double maxSequenceMass,
                                       DatabaseSearchMode tda, InternalCleavageType searchMode)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Search parameters
            const int maxNumNTermCleavages     = 1; // 30
            const int maxNumCTermCleavages     = 0;
            const int precursorIonTolerancePpm = 10;
            const int productIonTolerancePpm   = 10;

            var topDownOptions = new MsPfParameters(
                specFilePath,
                dbFilePath,
                outputDir,
                aaSet, "")
            {
                MinSequenceLength        = minSequenceLength,
                MaxSequenceLength        = maxSequenceLength,
                MaxNumNTermCleavages     = maxNumNTermCleavages,
                MaxNumCTermCleavages     = maxNumCTermCleavages,
                MinPrecursorIonCharge    = minPrecursorIonCharge,
                MaxPrecursorIonCharge    = maxPrecursorIonCharge,
                MinProductIonCharge      = minProductIonCharge,
                MaxProductIonCharge      = maxProductIonCharge,
                MinSequenceMass          = minSequenceMass,
                MaxSequenceMass          = maxSequenceMass,
                PrecursorIonTolerancePpm = precursorIonTolerancePpm,
                ProductIonTolerancePpm   = productIonTolerancePpm,
                TargetDecoySearchMode    = tda,
                InternalCleavageMode     = searchMode,
            };

            var topDownLauncher = new IcTopDownLauncher(topDownOptions);

            //topDownLauncher.ForceParallel = true;
            //topDownLauncher.MaxNumThreads = -1;

            topDownLauncher.RunSearch(0.7);
            //topDownLauncher.RunIntactProteinSearch();
        }
Exemplo n.º 5
0
        public void TestTopDownSearch(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet,
                                      bool?tda, int searchMode)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const int    minSequenceLength     = 21;  // 7
            const int    maxSequenceLength     = 500; // 1000
            const int    minPrecursorIonCharge = 2;   // 3
            const int    maxPrecursorIonCharge = 60;  // 67
            const int    minProductIonCharge   = 1;   // 1
            const int    maxProductIonCharge   = 20;  // 15
            const double minSequenceMass       = 3000.0;
            const double maxSequenceMass       = 50000.0;

            TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet,
                              minSequenceLength, maxSequenceLength,
                              minPrecursorIonCharge, maxPrecursorIonCharge,
                              minProductIonCharge, maxProductIonCharge,
                              minSequenceMass, maxSequenceMass,
                              tda, searchMode
                              );
        }
Exemplo n.º 6
0
        public void TestFeatureId()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";

            if (!File.Exists(dataSet))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet);
            }

            // Feature: 5236-5286	6-12	8480.3681	5
            const int    minScanNum  = 5236;
            const int    maxScanNum  = 5286;
            const double featureMass = 8480.3681;

            //const int minScanNum = 7251;
            //const int maxScanNum = 7326;
            //const double featureMass = 32347.18;

//            const int minScanNum = 4451;
//            const int maxScanNum = 4541;
//            const double featureMass = 31267.95;

            var tolerance        = new Tolerance(10);
            var relaxedTolerance = new Tolerance(20);

            const int minTagLength       = 5;
            const int minMergedTagLength = 7;
            const int minNumTagMatches   = 1;

            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");
            var run         = PbfLcMsRun.GetLcMsRun(rawFileName);

            var aminoAcidSet    = AminoAcidSet.GetStandardAminoAcidSet();
            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var filter          = new Ms1FtFilter(run, tolerance, featureFileName);
            var ms2ScanNums     =
                filter.GetMatchingMs2ScanNums(featureMass)
                .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum)
                .ToArray();

            const string tagFileName   = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            var tagParser    = new SequenceTagParser(tagFileName, minTagLength);

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var ms2ScanNum in ms2ScanNums)
            {
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                foreach (var tag in tags)
                {
                    var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                    foreach (var index in matchedIndices)
                    {
                        var protein    = fastaDb.GetProteinName(index);
                        var startIndex = fastaDb.GetZeroBasedPositionInProtein(index);
                        var matchedTag = new MatchedTag(tag, startIndex, featureMass);
                        IList <MatchedTag> existingTags;
                        if (proteinsToTags.TryGetValue(protein, out existingTags))
                        {
                            existingTags.Add(matchedTag);
                        }
                        else
                        {
                            proteinsToTags.Add(protein, new List <MatchedTag> {
                                matchedTag
                            });
                        }
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);

                var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet,
                                                      tolerance, relaxedTolerance);

                Console.WriteLine("********** Before merging");
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);

                    matchedTagSet.Add(matchedTag);
                }

                Console.WriteLine("********** After merging");
                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < minMergedTagLength)
                    {
                        continue;
                    }
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }

                break;
            }
        }
Exemplo n.º 7
0
        public void FilteringEfficiencyQcShew()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826);

            sw.Stop();

            Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds);

            const int minPrecursorCharge = 3;
            const int maxPrecursorCharge = 30;
            const int tolerancePpm       = 10;
            var       tolerance          = new Tolerance(tolerancePpm);

            sw.Reset();
            sw.Start();
            var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40);

            //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40);

            sw.Stop();

            Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds);

            ISequenceFilter ms1Filter = ms1BasedFilter;

            sw.Reset();
            sw.Start();
            const double minProteinMass = 3000.0;
            const double maxProteinMass = 30000.0;
            var          minBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass);
            var          maxBinNum      = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass);
            var          numComparisons = 0L;

            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum);
                numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count();
            }
            sw.Stop();

            Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds);

            //const string prot =
            //    "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA";
            //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass;
            //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass)));

            const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv";

            if (!File.Exists(resultFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath);
            }

            var tsvReader = new TsvFileParser(resultFilePath);
            var scanNums  = tsvReader.GetData("Scan(s)");
            var charges   = tsvReader.GetData("Charge");
            var scores    = tsvReader.GetData("E-value");
            var sequences = tsvReader.GetData("Peptide");

            //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv";
            //var tsvReader = new TsvFileParser(resultFilePath);
            //var scanNums = tsvReader.GetData("ScanNum");
            //var charges = tsvReader.GetData("Charge");
            //var scores = tsvReader.GetData("Score");
            //var sequences = tsvReader.GetData("Sequence");

            var aaSet = new AminoAcidSet();

            var seqSet             = new HashSet <string>();
            var allSeqSet          = new HashSet <string>();
            var numUnfilteredSpecs = 0;
            var totalSpecs         = 0;

            for (var i = 0; i < scores.Count; i++)
            {
                var score = Convert.ToDouble(scores[i]);
                if (score > 1E-4)
                {
                    continue;
                }
                //if (score < 10) continue;

                var scanNum = Convert.ToInt32(scanNums[i]);
                var charge  = Convert.ToInt32(charges[i]);

                var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]);
                if (sequence == null || sequence.Contains("("))
                {
                    continue;
                }
                //var sequence = sequences[i];
                var composition = aaSet.GetComposition(sequence) + Composition.H2O;

                var precursorIon = new Ion(composition, charge);
                var isValid      = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz());
                if (!isValid)
                {
                    continue;
                }
                ++totalSpecs;

                var precursorScanNum = run.GetPrecursorScanNum(scanNum);
                var precursorSpec    = run.GetSpectrum(precursorScanNum);
                var corr1            = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var nextScanNum = run.GetNextScanNum(scanNum, 1);
                var nextSpec    = run.GetSpectrum(nextScanNum);
                var corr2       = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1);

                var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0;
                if (corr3 == 1)
                {
                    numUnfilteredSpecs++;
                    seqSet.Add(sequences[i]);
                }
                allSeqSet.Add(sequences[i]);

                var corrMax = new[] { corr1, corr2, corr3 }.Max();

                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax);
            }

            Console.WriteLine("TotalNumComparisons: {0}", numComparisons);
            Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1));
            Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs);
            Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemplo n.º 8
0
 /// <summary>
 /// Initializes a new instance of the <see cref="MsPfParameters"/> class, with the parameters specifying required options for a search
 /// </summary>
 /// <param name="specFilePath"></param>
 /// <param name="dbFilePath"></param>
 /// <param name="outputDir"></param>
 /// <param name="aaSet"></param>
 /// <param name="featureFilePath"></param>
 public MsPfParameters(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet, string featureFilePath = null) : this()
 {
     SpecFilePath     = specFilePath;
     DatabaseFilePath = dbFilePath;
     AminoAcidSet     = aaSet;
     OutputDir        = outputDir;
     FeatureFilePath  = featureFilePath;
 }
Exemplo n.º 9
0
        public void Test43KProtein()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Configure amino acid set
            var acetylN              = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM                  = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC             = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC         = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            var dethiomethylM        = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false);
            var deamidatedN          = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false);
            var deamidatedQ          = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false);
            var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C',
                                                              SequenceLocation.ProteinNTerm, false);
            var phosphoS         = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosphoT         = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosphoY         = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);
            var nitrosylC        = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false);
            var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                glutathioneC,
                oxM,
                dethiomethylM,
                acetylN,
                //phosphoS,
                //phosphoT,
                //phosphoY,
                deamidatedN,
//                deamidatedQ,
                glutathioneC,
                pyroCarbamidomethylC,
                nitrosylC,
                nethylmaleimideC
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

//            var aaSet = new AminoAcidSet();

            if (!File.Exists(TestRawFilePath))
            {
                Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath);
            }

            var          run          = PbfLcMsRun.GetLcMsRun(TestRawFilePath);
            const string protSequence =
                "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR";
            const string annotation = "_." + protSequence + "._";
            var          seqGraph   = SequenceGraph.CreateGraph(aaSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;
            }

            var ms1Filter        = new SimpleMs1Filter();
            var ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(run);

            foreach (var ms2ScanNum in Ms2ScanNums)
            {
                ms2ScorerFactory.GetScorer(ms2ScanNum);
            }

            for (var numNTermCleavages = 0; numNTermCleavages <= 0; numNTermCleavages++)
            {
                if (numNTermCleavages > 0)
                {
                    seqGraph.CleaveNTerm();
                }
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;
                    var modCombinations        = modCombs[modIndex];

                    foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass))
                    {
                        var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                        if (spec == null)
                        {
                            continue;
                        }
                        var charge =
                            (int)
                            Math.Round(sequenceMass /
                                       (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton));
                        var scorer = ms2ScorerFactory.GetMs2Scorer(ms2ScanNum);
                        var score  = seqGraph.GetFragmentScore(scorer);
                        if (score <= 3)
                        {
                            continue;
                        }

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = protSequence.Substring(numNTermCleavages);
                        var pre          = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1];
                        var post         = annotation[annotation.Length - 1];

                        Console.WriteLine("{0}.{1}.{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", pre, sequence, post, ms2ScanNum, modCombinations,
                                          precursorIon.GetMostAbundantIsotopeMz(), precursorIon.Charge, precursorIon.Composition.Mass, score);
                    }
                }
            }
        }
Exemplo n.º 10
0
 /// <summary>
 /// Initializes a new instance of the <see cref="MsPfParameters"/> class, with the parameters specifying required options for a search
 /// </summary>
 /// <param name="specFilePath"></param>
 /// <param name="dbFilePath"></param>
 /// <param name="outputDir"></param>
 /// <param name="aaSet"></param>
 /// <param name="featureFilePath"></param>
 public MsPfParameters(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet, string featureFilePath = null) : this()
 {
     // ReSharper disable VirtualMemberCallInConstructor
     SpecFilePath     = specFilePath;
     DatabaseFilePath = dbFilePath;
     AminoAcidSet     = aaSet;
     OutputDir        = outputDir;
     FeatureFilePath  = featureFilePath;
     // ReSharper restore VirtualMemberCallInConstructor
 }
Exemplo n.º 11
0
        public IcRescorer(string specFilePath, string icResultFilePath, string outputFilePath, AminoAcidSet aaSet, Tolerance tolerance, double ms2CorrThreshold = 0.7
                          , int minProductIonCharge = 1, int maxProductIonCharge = 10)
        {
            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826);

            _topDownScorer = new InformedTopDownScorer(run, aaSet, minProductIonCharge, maxProductIonCharge, tolerance, ms2CorrThreshold);
            Rescore(icResultFilePath, outputFilePath);
        }
Exemplo n.º 12
0
        public void FindProteinDeltaMass()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string folderPath = @"D:\MassSpecFiles\Glyco\";

            if (!Directory.Exists(folderPath))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, folderPath);
            }

            var fileSet = new string[]
            {
                "User_sample_test_02252015", "User_sample_test_MWCO_02262016", "User_sample_test_SEC_F3_03022105",
                "User_sample_test_SEC_F1_02272015", "User_sample_test_SEC_F2_02282015"
            };
            const string fastaFilePath = folderPath + "ID_003836_DA9CC1E4.fasta";

            for (var i = 0; i < fileSet.Length; i++)
            {
                var datasetName = fileSet[i];
                var tagFilePath = folderPath + datasetName + ".seqtag";
                //var outputFilePath = folderPath + datasetName + ".matchedtag";
                var outputFilePath = folderPath + datasetName + ".dmass";
                var fastaDb        = new FastaDatabase(fastaFilePath);
                var searchableDb   = new SearchableDatabase(fastaDb);

                using (var writer = new StreamWriter(outputFilePath))
                {
                    var isHeader    = true;
                    var nReadSeqTag = 0;

                    Console.WriteLine(@"Reading {0} file", tagFilePath);

                    var nColumn = 0;
                    foreach (var line in File.ReadAllLines(tagFilePath))
                    {
                        if (isHeader)
                        {
                            isHeader = false;
                            nColumn  = line.Split('\t').Length;
                            writer.WriteLine(line + "\t" + "Protein" + "\t" + "DetectedFlankingMass" + "\t" + "ExpectedFlankingMass" + "\t" + "DeltaMass");
                            continue;
                        }

                        var token = line.Split('\t');
                        if (token.Length != nColumn)
                        {
                            continue;
                        }
                        var tag = token[1];
                        //var scan = Convert.ToInt32(token[0]);

                        if (tag.Length < 6)
                        {
                            continue;
                        }

                        var nTerminal            = token[2].Equals("1");
                        var detectedFlankingMass = Double.Parse(token[3]);

                        if (!nTerminal)
                        {
                            detectedFlankingMass -= Composition.H2O.Mass;
                        }

                        nReadSeqTag++;

                        var matchedProteins =
                            searchableDb.FindAllMatchedSequenceIndices(tag)
                            .Select(index => fastaDb.GetProteinName(index))
                            .Distinct().ToArray();

                        if (matchedProteins.Length < 1)
                        {
                            continue;
                        }

                        foreach (var protName in matchedProteins)
                        {
                            var seqStr = fastaDb.GetProteinSequence(protName);
                            var oriSeq = new Sequence(seqStr, AminoAcidSet.GetStandardAminoAcidSet());

                            var startIdx = 0;
                            while (true)
                            {
                                var idx = seqStr.IndexOf(tag, startIdx);

                                if (idx < 0)
                                {
                                    break;          //no matching
                                }
                                //var nClv = (nTerminal) ? idx : seqStr.Length - idx - tag.Length;
                                var nClv = (nTerminal) ? 2 : 1;

                                for (var j = 0; j < nClv; j++)
                                {
                                    var flankComposition = (nTerminal)
                                        ? oriSeq.GetComposition(j, idx)
                                        : oriSeq.GetComposition(idx + tag.Length, oriSeq.Count - j);

                                    var massDiff = (detectedFlankingMass - flankComposition.Mass);
                                    if (massDiff > -500 && massDiff < 2000)
                                    {
                                        //writer.WriteLine(massDiff);
                                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", line, protName, detectedFlankingMass, flankComposition.Mass, massDiff);
                                    }

                                    if (massDiff > 2000)
                                    {
                                        break;
                                    }
                                }

                                startIdx = idx + tag.Length;
                            }
                        }
                        //var matchedProteinStr = string.Join(",", matchedProteins);
                        //var massDiffStr = string.Join(",", massDiffList);
                        //writer.WriteLine("{0}\t{1}\t{2}\t{3}", line, matchedProteins.Length, matchedProteinStr, massDiffStr);
                    }

                    Console.WriteLine(@"{0} seq tags are processed", nReadSeqTag);
                }
                Console.WriteLine(@"Done");
            }
        }
Exemplo n.º 13
0
        public string Parse(Dictionary <string, string> parameters)
        {
            var message = CheckIsValid(parameters);

            if (message != null)
            {
                return(message);
            }

            var specFilePath = parameters["-s"];

            if (Directory.Exists(specFilePath)) // Directory
            {
                SpecFilePaths = Directory.GetFiles(specFilePath, "*.raw");
            }
            else
            {
                SpecFilePaths = new[] { specFilePath };
            }

            DatabaseFilePath = parameters["-d"];

            var outputDir = parameters["-o"] ?? Environment.CurrentDirectory;

            if (outputDir[outputDir.Length - 1] == Path.DirectorySeparatorChar)
            {
                outputDir = outputDir.Remove(outputDir.Length - 1);
            }
            if (!Directory.Exists(outputDir))
            {
                if (File.Exists(outputDir) && !File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory))
                {
                    return("OutputDir " + outputDir + " is not a directory!");
                }
                Directory.CreateDirectory(outputDir);
            }
            OutputDir = outputDir;

            var modFilePath = parameters["-mod"];

            if (modFilePath != null)
            {
                var parser = new ModFileParser(modFilePath);
                _searchModifications      = parser.SearchModifications;
                _maxNumDynModsPerSequence = parser.MaxNumDynModsPerSequence;

                if (_searchModifications == null)
                {
                    return("Error while parsing " + modFilePath + "!");
                }

                AminoAcidSet = new AminoAcidSet(_searchModifications, _maxNumDynModsPerSequence);
            }
            else
            {
                AminoAcidSet         = new AminoAcidSet();
                _searchModifications = new SearchModification[0];
            }

            var    enzymeId = Convert.ToInt32(parameters["-e"]);
            Enzyme enzyme;

            switch (enzymeId)
            {
            case 0:
                enzyme = Enzyme.UnspecificCleavage;
                break;

            case 1:
                enzyme = Enzyme.Trypsin;
                break;

            case 2:
                enzyme = Enzyme.Chymotrypsin;
                break;

            case 3:
                enzyme = Enzyme.LysC;
                break;

            case 4:
                enzyme = Enzyme.LysN;
                break;

            case 5:
                enzyme = Enzyme.GluC;
                break;

            case 6:
                enzyme = Enzyme.ArgC;
                break;

            case 7:
                enzyme = Enzyme.AspN;
                break;

            case 8:
                enzyme = Enzyme.Alp;
                break;

            case 9:
                enzyme = Enzyme.NoCleavage;
                break;

            default:
                return("Invalid enzyme ID (" + enzymeId + ") for parameter -e");
            }
            Enzyme = enzyme;

            NumTolerableTermini = Convert.ToInt32(parameters["-ntt"]);
            if (NumTolerableTermini < 0 || NumTolerableTermini > 2)
            {
                return("Invalid value (" + NumTolerableTermini + ") for parameter -m");
            }

            PrecursorIonTolerancePpm = Convert.ToDouble(parameters["-t"]);
            ProductIonTolerancePpm   = Convert.ToDouble(parameters["-f"]);

            var tdaVal = Convert.ToInt32(parameters["-tda"]);

            if (tdaVal != 0 && tdaVal != 1)
            {
                return("Invalid value (" + tdaVal + ") for parameter -tda");
            }
            Tda = (tdaVal == 1);

            MinSequenceLength = Convert.ToInt32(parameters["-minLength"]);
            MaxSequenceLength = Convert.ToInt32(parameters["-maxLength"]);
            if (MinSequenceLength > MaxSequenceLength)
            {
                return("MinSequenceLength (" + MinSequenceLength + ") is larger than MaxSequenceLength (" + MaxSequenceLength + ")!");
            }

            MinPrecursorIonCharge = Convert.ToInt32(parameters["-minCharge"]);
            MaxPrecursorIonCharge = Convert.ToInt32(parameters["-maxCharge"]);
            if (MinSequenceLength > MaxSequenceLength)
            {
                return("MinPrecursorCharge (" + MinPrecursorIonCharge + ") is larger than MaxPrecursorCharge (" + MaxPrecursorIonCharge + ")!");
            }

            MinProductIonCharge = Convert.ToInt32(parameters["-minFragCharge"]);
            MaxProductIonCharge = Convert.ToInt32(parameters["-maxFragCharge"]);
            if (MinSequenceLength > MaxSequenceLength)
            {
                return("MinFragmentCharge (" + MinProductIonCharge + ") is larger than MaxFragmentCharge (" + MaxProductIonCharge + ")!");
            }

            return(null);
        }
Exemplo n.º 14
0
        public void TestGetScoreDistribution(int scanNum, string protSequence)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            if (!pbfFile.Exists)
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFile);
            }

            const string modStr = "";

            const int    maxCharge              = 20;
            const int    minCharge              = 1;
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance              = new Tolerance(10);
            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);
            //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass));

            var stopwatch    = Stopwatch.StartNew();
            var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);

            stopwatch.Stop();
            Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            var stopwatch2 = Stopwatch.StartNew();

            var sequence    = Sequence.CreateSequence(protSequence, modStr, aaSet);
            var proteinMass = sequence.Mass + Composition.H2O.Mass;

            Console.WriteLine("Mass = {0}", proteinMass);

            var spectrum   = run.GetSpectrum(scanNum) as ProductSpectrum;
            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge,
                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

            stopwatch.Restart();

            var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer);
            var graph  = graphFactory.CreateScoringGraph(scorer, proteinMass);

            stopwatch.Stop();
            Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);

            stopwatch.Reset();
            stopwatch.Start();
            var gf = new GeneratingFunction(graph);

            gf.ComputeGeneratingFunction();
            //gf.ComputeGeneratingFunction(graph);
            stopwatch.Stop();
            Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d);
            var scoreDist = gf.GetScoreDistribution();

            Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore);

            Console.WriteLine("{0} : {1}", "score", "specEValue");

            for (var score = 15; score <= gf.MaximumScore; score++)
            {
                var specEvalue = gf.GetSpectralEValue(score);
                Console.WriteLine("{0} : {1}", score, specEvalue);
            }

            stopwatch2.Stop();
            Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", stopwatch2.ElapsedMilliseconds / 1000.0d);
        }
Exemplo n.º 15
0
        public FilteredProteinMassBinning(AminoAcidSet aaSet, double maxProteinMass = 50000, int numBits = 27)
        {
            _aminoAcidSet = aaSet;
            var terminalModifications  = GetTerminalModifications(aaSet);
            var extendedAminoAcidArray = GetExtendedAminoAcidArray(aaSet);

            MaxMass = maxProteinMass;
            MinMass = MaxMass;
            foreach (var aa in extendedAminoAcidArray)
            {
                if (aa.Mass < MinMass)
                {
                    MinMass = aa.Mass;
                }
                foreach (var mod in terminalModifications)
                {
                    var modAa = new ModifiedAminoAcid(aa, mod);
                    if (modAa.Mass < MinMass)
                    {
                        MinMass = modAa.Mass;
                    }
                }
            }

            _mzComparer = new MzComparerWithBinning(numBits);

            _minMzBinIndex = _mzComparer.GetBinNumber(MinMass);
            _maxMzBinIndex = _mzComparer.GetBinNumber(MaxMass);

            var numberOfMzBins = _maxMzBinIndex - _minMzBinIndex + 2; // pad zero mass bin

            _mzBinToFilteredBinMap = new int[numberOfMzBins];
            for (var i = 0; i < numberOfMzBins; i++)
            {
                _mzBinToFilteredBinMap[i] = -1;
            }

            var tempMap   = new int[numberOfMzBins];
            var fineNodes = new BitArray(Constants.GetBinNumHighPrecision(MaxMass));

            fineNodes[0] = true;

            var effectiveBinCounter = 0;

            for (var fineBinIdx = 0; fineBinIdx < fineNodes.Length; fineBinIdx++)
            {
                if (!fineNodes[fineBinIdx])
                {
                    continue;
                }

                var fineNodeMass = fineBinIdx / Constants.RescalingConstantHighPrecision;

                foreach (var aa in extendedAminoAcidArray)
                {
                    var validFineNodeIndex = Constants.GetBinNumHighPrecision(fineNodeMass + aa.Mass);
                    if (validFineNodeIndex >= fineNodes.Length)
                    {
                        break;
                    }
                    fineNodes[validFineNodeIndex] = true;

                    if (fineBinIdx == 0 && !(aa is ModifiedAminoAcid)) // include terminal modifications
                    {
                        foreach (var terminalMod in terminalModifications)
                        {
                            var modifiedAa = new ModifiedAminoAcid(aa, terminalMod);
                            validFineNodeIndex = Constants.GetBinNumHighPrecision(fineNodeMass + modifiedAa.Mass);
                            if (validFineNodeIndex >= fineNodes.Length)
                            {
                                break;
                            }
                            fineNodes[validFineNodeIndex] = true;
                        }
                    }
                }

                /*foreach (var m in massList)
                 * {
                 *  var validFineNodeIndex = Constants.GetBinNumHighPrecision(fineNodeMass + m);
                 *  if (validFineNodeIndex >= fineNodes.Length) break;
                 *  fineNodes[validFineNodeIndex] = true;
                 * }*/

                var binNum = _mzComparer.GetBinNumber(fineNodeMass);
                if (fineBinIdx == 0 || (binNum >= _minMzBinIndex && binNum <= _maxMzBinIndex && _mzBinToFilteredBinMap[binNum - _minMzBinIndex + 1] < 0))
                {
                    _mzBinToFilteredBinMap[binNum == 0 ? 0 : binNum - _minMzBinIndex + 1] = effectiveBinCounter;
                    tempMap[effectiveBinCounter] = binNum;
                    effectiveBinCounter++;
                }
            }
            _filteredBinToMzBinMap = new int[effectiveBinCounter];
            Array.Copy(tempMap, _filteredBinToMzBinMap, effectiveBinCounter);
        }
Exemplo n.º 16
0
        public void TestBottomUpSearch(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet, bool?tda, int ntt, double corrThreshold = 0.3)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Search parameters
            const int minSequenceLength        = 6;  // 7
            const int maxSequenceLength        = 40; // 1000
            const int minPrecursorIonCharge    = 1;  // 3
            const int maxPrecursorIonCharge    = 4;  // 67
            const int minProductIonCharge      = 1;  // 1
            const int maxProductIonCharge      = 2;  // 15
            const int precursorIonTolerancePpm = 10;
            const int productIonTolerancePpm   = 10;

            var enzyme = Enzyme.Trypsin;

            var bottomUpLauncher = new IcBottomUpLauncher(
                specFilePath,
                dbFilePath,
                outputDir,
                aaSet,
                enzyme)
            {
                MinSequenceLength          = minSequenceLength,
                MaxSequenceLength          = maxSequenceLength,
                MinPrecursorIonCharge      = minPrecursorIonCharge,
                MaxPrecursorIonCharge      = maxPrecursorIonCharge,
                MinProductIonCharge        = minProductIonCharge,
                MaxProductIonCharge        = maxProductIonCharge,
                PrecursorIonTolerancePpm   = precursorIonTolerancePpm,
                ProductIonTolerancePpm     = productIonTolerancePpm,
                RunTargetDecoyAnalysisBool = tda,
                NumTolerableTermini        = ntt
            };

            bottomUpLauncher.RunSearch(corrThreshold);
            //topDownLauncher.RunIntactProteinSearch();
        }
Exemplo n.º 17
0
        public void TestGetProteinsWithTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07";
            //            const int scanNum = 5158;
            const int minTagLength     = 7;
            const int minNumTagMatches = 1;
            var       aminoAcidSet     = AminoAcidSet.GetStandardAminoAcidSet();

            const int scanNum = 2;
            // Parse sequence tags
            //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");

            const string rawFilePath = "";

            const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            //var tagParser = new SequenceTagParser(tagFileName, minTagLength);
            //var tags = tagParser.GetSequenceTags(scanNum);
            var run       = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var spec      = run.GetSpectrum(scanNum) as ProductSpectrum;
            var tagFinder = new SequenceTagFinder(spec, new Tolerance(5));
            var tags      = tagFinder.GetAllSequenceTagString();

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                foreach (var index in matchedIndices)
                {
                    var protein    = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetOneBasedPositionInProtein(index);
                    var matchedTag = new MatchedTag(tag, startIndex, 0.0);
                    IList <MatchedTag> existingTags;
                    if (proteinsToTags.TryGetValue(protein, out existingTags))
                    {
                        existingTags.Add(matchedTag);
                    }
                    else
                    {
                        proteinsToTags.Add(protein, new List <MatchedTag> {
                            matchedTag
                        });
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}",
                                      matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass),
                                      seq,
                                      matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass),
                                      matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }
            }
        }
Exemplo n.º 18
0
        public void TestChaoChao(string specFilePath)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            const string dbFilePath = @"D:\Research\Data\ChaoChao\database\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";

            if (!File.Exists(dbFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath);
            }

            const string outputDir = @"D:\Research\Data\ChaoChao\Ic\";

            // Configure amino acid set
            //var carbamidomethylC = new SearchModification(Modification.Carbamidomethylation, 'C', SequenceLocation.Everywhere, true);
            //var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false);
            //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false);
            //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 0;
            var       searchModifications  = new List <SearchModification>
            {
                //carbamidomethylC,
                //acetylN,
                //oxM
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            bool?tda = true;                             // true: target & decoy, false: target, null: decoy

            const int    minSequenceLength        = 7;   // 7
            const int    maxSequenceLength        = 150; // 1000
            const int    minPrecursorIonCharge    = 1;   // 3
            const int    maxPrecursorIonCharge    = 30;  // 67
            const int    minProductIonCharge      = 1;   // 1
            const int    maxProductIonCharge      = 15;  // 15
            const double precursorIonTolerancePpm = 10;
            const double productIonTolerancePpm   = 10;
            const double corrThreshold            = 0.7;

            var bottomUpLauncher = new IcBottomUpLauncher(
                specFilePath,
                dbFilePath,
                outputDir,
                aaSet,
                null)
            {
                MinSequenceLength          = minSequenceLength,
                MaxSequenceLength          = maxSequenceLength,
                MinPrecursorIonCharge      = minPrecursorIonCharge,
                MaxPrecursorIonCharge      = maxPrecursorIonCharge,
                MinProductIonCharge        = minProductIonCharge,
                MaxProductIonCharge        = maxProductIonCharge,
                PrecursorIonTolerancePpm   = precursorIonTolerancePpm,
                ProductIonTolerancePpm     = productIonTolerancePpm,
                RunTargetDecoyAnalysisBool = tda,
                NumTolerableTermini        = 0
            };

            bottomUpLauncher.RunSearch(corrThreshold);
        }
Exemplo n.º 19
0
        public void TestRunningTimeChromGen()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf";

            if (!File.Exists(rafFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rafFilePath);
            }

            var rafRun = new PbfLcMsRun(rafFilePath);

            var tolerance = new Tolerance(10);

            const string dbFile = @"D:\Research\Data\CommonContaminants\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);
            var aaSet     = new AminoAcidSet(Modification.Carbamidomethylation);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            var numPeptides = 0;

            foreach (var peptide in indexedDb.AnnotationsAndOffsets(6, 30, 2, 2, Enzyme.Trypsin))
            {
                ++numPeptides;
                var comp = new Sequence(peptide.Annotation.Substring(2, peptide.Annotation.Length - 4), aaSet).Composition + Composition.H2O;
                var mz   = new Ion(comp, 2).GetMonoIsotopicMz();
                //Console.WriteLine(peptide.Annotation + " " + mz);
                rafRun.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);
                //run.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);

                //var xic1 = run.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);
                //var xic2 = rafRun.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);
                //Assert.True(xic1.Count == xic2.Count);
                //for (var i = 0; i < xic1.Count; i++)
                //{
                //    if (!xic1[i].Equals(xic2[i]))
                //    {
                //        Console.WriteLine("{0} {1} {2}", i, xic1[i], xic2[i]);
                //    }
                //    Assert.True(xic1[i].Equals(xic2[i]));
                //}

                if (numPeptides == 100000)
                {
                    break;
                }
            }
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemplo n.º 20
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            if (pbfFile.DirectoryName == null)
            {
                Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName);
            }

            var fileExt = new[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();

                var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, 30, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));

                        if (!(run.GetSpectrum(scan) is ProductSpectrum ms2Spec))
                        {
                            Console.WriteLine("Could not get the spectrum datafor scan {0}", scan);
                        }
Exemplo n.º 21
0
        public void TestNominalMassErrors()
        {
            const int MAX_RUNTIME_SECONDS = 60;

            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_003962_71E1A1D4.fasta"));

            var db = new FastaDatabase(fastaFile.FullName);

            db.Read();
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;

            sw.Start();

            var hist  = new long[11];
            var aaSet = new AminoAcidSet();

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation   = peptideAnnotationAndOffset.Annotation;
                var sequenceStr  = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass         = sequenceComp.Mass;
                var nominalMass  = sequenceComp.NominalMass;
                var error        = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass;
                var errorBin     = error + hist.Length / 2;
                if (errorBin < 0)
                {
                    errorBin = 0;
                }
                if (errorBin >= hist.Length)
                {
                    errorBin = hist.Length - 1;
                }
                hist[errorBin]++;

                if (numSequences % 100 == 0 && sw.Elapsed.TotalSeconds > MAX_RUNTIME_SECONDS)
                {
                    break;
                }
            }

            Console.WriteLine("Sequence count: {0:N0}", numSequences);
            Console.WriteLine("{0,10}  {1,10}  {2,10}", "Bin ", "Count", "Fraction");
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0,10:F1}  {1,10:N0}  {2,10:F1}%", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences * 100);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:F1} sec", sw.Elapsed.TotalSeconds);
        }
Exemplo n.º 22
0
        public void TestMatchedPeakCounter()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Parameters
            var precursorIonTolerance = new Tolerance(15);
            var productIonTolerance   = new Tolerance(15);

            var sw = new System.Diagnostics.Stopwatch();

            var aaSet = new AminoAcidSet();

            const string protAnnotation = "_.MFQQEVTITAPNGLHTRPAAQFVKEAKGFTSEITVTSNGKSASAKSLFKLQTLGLTQGTVVTISAEGEDEQKAVEHLVKLMAELE._";

            // Create a sequence graph
            var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation);

            Assert.NotNull(seqGraph, "Invalid sequence: {0}", protAnnotation);

            var specFilePath = Base.Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_SPEC_FILES_FOLDER, "SBEP_STM_001_02272012_Aragon.pbf"));

            var run = InMemoryLcMsRun.GetLcMsRun(specFilePath.FullName, 1.4826, 1.4826);

            sw.Start();
            var precursorFilter = new Ms1ContainsIonFilter(run, precursorIonTolerance);

            var seqCompositionArr = seqGraph.GetSequenceCompositions();

            Console.WriteLine("Length: {0}\tNumCompositions: {1}", protAnnotation.Length - 4, seqCompositionArr.Length);

            const int charge     = 6;
            const int modIndex   = 0;
            const int ms2ScanNum = 4448;

            var seqComposition     = seqCompositionArr[modIndex];
            var peptideComposition = seqComposition + Composition.H2O;

            peptideComposition.GetIsotopomerEnvelopeRelativeIntensities();

            Console.WriteLine("Composition: {0}, AveragineMass: {1}", seqComposition, seqComposition.Mass);
            seqGraph.SetSink(modIndex);

            var precursorIon = new Ion(peptideComposition, charge);

            Assert.True(precursorFilter.IsValid(precursorIon, ms2ScanNum));

            var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;

            Assert.True(spec != null);

            var scorer = new MatchedPeakCounter(spec, productIonTolerance, 1, 10);
            var score  = seqGraph.GetFragmentScore(scorer);

            Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", protAnnotation, charge, precursorIon.GetMostAbundantIsotopeMz(), ms2ScanNum, score);

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemplo n.º 23
0
        public void TestForYufeng()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // QC_Shew
            const string specFilePath = @"H:\Research\Yufeng\TopDownYufeng\raw\yufeng_column_test2.raw";
            //const string dbFilePath = @"H:\Research\Yufeng\TopDownYufeng\database\ID_002216_235ACCEA.fasta";
            const string dbFilePath = @"H:\Research\Yufeng\TopDownYufeng\database\SO_3942_Truncated.fasta";
            const string outputDir  = @"H:\Research\Yufeng\TopDownYufeng\Debug";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            if (!File.Exists(dbFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath);
            }

            // Configure amino acid set
            //var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            //var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            ////            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            //var pyroGluQ = new SearchModification(Modification.PTeyroGluQ, 'Q', SequenceLocation.Everywhere, false);
            //var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false);
            //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false);
            //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false);

            //const int numMaxModsPerProtein = 0;
            //var searchModifications = new List<SearchModification>
            //{
            //    dehydroC,
            //    oxM,
            //    acetylN
            //};
            //var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var       aaSet      = new AminoAcidSet();
            const int searchMode = 2;                 // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term
            bool?     tda        = false;             // true: target & decoy, false: target, null: decoy

            const int    minSequenceLength     = 21;  // 7
            const int    maxSequenceLength     = 500; // 1000
            const int    minPrecursorIonCharge = 2;   // 3
            const int    maxPrecursorIonCharge = 50;  // 67
            const int    minProductIonCharge   = 1;   // 1
            const int    maxProductIonCharge   = 20;  // 15
            const double minSequenceMass       = 3000.0;
            const double maxSequenceMass       = 50000.0;

            TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet,
                              minSequenceLength, maxSequenceLength,
                              minPrecursorIonCharge, maxPrecursorIonCharge,
                              minProductIonCharge, maxProductIonCharge,
                              minSequenceMass, maxSequenceMass,
                              tda, searchMode
                              );
        }
Exemplo n.º 24
0
        }                                                         // true: target and decoy, false: target only, null: decoy only

        public void QuickId()
        {
            const string rawFilePath   = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string modFilePath   = @"H:\Research\QCShew_TopDown\Production\Mods.txt";
            const int    numBits       = 29; // max error: 4ppm
            const int    minCharge     = 1;
            const int    maxCharge     = 20;
            var          tolerance     = new Tolerance(10);
            const double corrThreshold = 0.7;

            var          comparer        = new MzComparerWithBinning(numBits);
            const double minFragmentMass = 200.0;
            const double maxFragmentMass = 50000.0;
            var          minFragMassBin  = comparer.GetBinNumber(minFragmentMass);
            var          maxFragMassBin  = comparer.GetBinNumber(maxFragmentMass);

            var aminoAcidSet = new AminoAcidSet(modFilePath);

            var run           = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var ms2ScanNumArr = run.GetScanNumbers(2).ToArray();

            var sw = new Stopwatch();

            sw.Start();
            Console.Write("Building Spectrum Arrays...");
            var massVectors = new BitArray[maxFragMassBin - minFragMassBin + 1];

            for (var i = minFragMassBin; i <= maxFragMassBin; i++)
            {
                massVectors[i - minFragMassBin] = new BitArray(run.MaxLcScan + 1);
            }

            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                var productSpec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                if (productSpec == null)
                {
                    continue;
                }

                var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(productSpec.Peaks, minCharge, maxCharge, 2, 1.1, tolerance, corrThreshold);

                if (deconvolutedPeaks == null)
                {
                    continue;
                }

                foreach (var p in deconvolutedPeaks)
                {
                    var mass      = p.Mass;
                    var deltaMass = tolerance.GetToleranceAsDa(mass, 1);
                    var minMass   = mass - deltaMass;
                    var maxMass   = mass + deltaMass;

                    var minBinNum = comparer.GetBinNumber(minMass);
                    var maxBinNum = comparer.GetBinNumber(maxMass);
                    for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
                    {
                        if (binNum >= minFragMassBin && binNum <= maxFragMassBin)
                        {
                            massVectors[binNum - minFragMassBin][ms2ScanNum] = true;
                        }
                    }
                }
            }
            sw.Stop();
            Console.WriteLine(@"{0:f1} sec.", sw.Elapsed.TotalSeconds);

            sw.Reset();
            sw.Start();
            var fastaDb = new FastaDatabase(fastaFilePath);

            fastaDb.Read();
            var indexedDb   = new IndexedDatabase(fastaDb);
            var numProteins = 0;
            var intactProteinAnnotationAndOffsets =
                indexedDb.IntactSequenceAnnotationsAndOffsets(0, int.MaxValue);

            var bestProtein = new string[run.MaxLcScan + 1];
            var bestScore   = new int[run.MaxLcScan + 1];

            foreach (var annotationAndOffset in intactProteinAnnotationAndOffsets)
            {
                if (++numProteins % 10 == 0)
                {
                    Console.WriteLine(@"Processing, {0} proteins done, {1:f1} sec elapsed",
                                      numProteins,
                                      sw.Elapsed.TotalSeconds);
                }
                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                var protSequence = annotation.Substring(2, annotation.Length - 4);

                // suffix
                var seqGraph = SequenceGraph.CreateGraph(aminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                                                         AminoAcid.ProteinCTerm);
                if (seqGraph == null)
                {
                    continue;
                }

                for (var numNTermCleavage = 0; numNTermCleavage <= 1; numNTermCleavage++)
                {
                    if (numNTermCleavage > 0)
                    {
                        seqGraph.CleaveNTerm();
                    }
                    var allCompositions = seqGraph.GetAllFragmentNodeCompositions();

                    var scoreArr = new int[run.MaxLcScan + 1];
                    foreach (var fragComp in allCompositions)
                    {
                        var suffixMass = fragComp.Mass + BaseIonType.Y.OffsetComposition.Mass;
                        var binNum     = comparer.GetBinNumber(suffixMass);
                        if (binNum < minFragMassBin || binNum > maxFragMassBin)
                        {
                            continue;
                        }

                        var vector = massVectors[binNum - minFragMassBin];
                        foreach (var ms2ScanNum in ms2ScanNumArr)
                        {
                            if (vector[ms2ScanNum])
                            {
                                ++scoreArr[ms2ScanNum];
                            }
                        }
                    }
                    foreach (var ms2ScanNum in ms2ScanNumArr)
                    {
                        if (scoreArr[ms2ScanNum] > bestScore[ms2ScanNum])
                        {
                            bestScore[ms2ScanNum] = scoreArr[ms2ScanNum];
                            var proteinName = fastaDb.GetProteinName(offset);
                            bestProtein[ms2ScanNum] = proteinName + (numNTermCleavage == 1 ? "'" : "");
                        }
                    }
                }
                // prefix
            }

            Console.WriteLine("ScanNum\tBestProtein\tScore");
            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                Console.WriteLine("{0}\t{1}\t{2}", ms2ScanNum, bestScore[ms2ScanNum], bestProtein[ms2ScanNum] ?? "");
            }
        }
Exemplo n.º 25
0
        public void TestTopDownSearch(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet,
                                      int minSequenceLength, int maxSequenceLength,
                                      int minPrecursorIonCharge, int maxPrecursorIonCharge,
                                      int minProductIonCharge, int maxProductIonCharge,
                                      double minSequenceMass, double maxSequenceMass,
                                      bool?tda, int searchMode)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            // Search parameters
            const int maxNumNTermCleavages     = 1; // 30
            const int maxNumCTermCleavages     = 0;
            const int precursorIonTolerancePpm = 10;
            const int productIonTolerancePpm   = 10;

            var topDownLauncher = new IcTopDownLauncher(
                specFilePath,
                dbFilePath,
                outputDir,
                aaSet, "")
            {
                MinSequenceLength          = minSequenceLength,
                MaxSequenceLength          = maxSequenceLength,
                MaxNumNTermCleavages       = maxNumNTermCleavages,
                MaxNumCTermCleavages       = maxNumCTermCleavages,
                MinPrecursorIonCharge      = minPrecursorIonCharge,
                MaxPrecursorIonCharge      = maxPrecursorIonCharge,
                MinProductIonCharge        = minProductIonCharge,
                MaxProductIonCharge        = maxProductIonCharge,
                MinSequenceMass            = minSequenceMass,
                MaxSequenceMass            = maxSequenceMass,
                PrecursorIonTolerancePpm   = precursorIonTolerancePpm,
                ProductIonTolerancePpm     = productIonTolerancePpm,
                RunTargetDecoyAnalysisBool = tda,
                SearchModeInt = searchMode,
            };

            //topDownLauncher.ForceParallel = true;
            //topDownLauncher.MaxNumThreads = -1;

            topDownLauncher.RunSearch(0.7);
            //topDownLauncher.RunIntactProteinSearch();
        }
Exemplo n.º 26
0
 static MgfSequenceReader()
 {
     StandardAminoAcidSet = new AminoAcidSet(Modification.Carbamidomethylation);
     Modifications        = new Dictionary <string, Tuple <AminoAcid, List <Modification> > >();
     Modifications.Add("99.032",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('G'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("113.048",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('A'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("129.043",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('S'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("141.079",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('V'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("143.059",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('T'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("147.035",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('M'),
                                                                   new List <Modification> {
         Modification.Oxidation
     }));
     Modifications.Add("157.038",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('D'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("160.03",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('C'),
                                                                   new List <Modification> {
         Modification.Carbamidomethylation
     }));
     Modifications.Add("171.054",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('E'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("173.051",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('M'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("189.046",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('F'),
                                                                   new List <Modification> {
         Modification.Acetylation
     }));
     Modifications.Add("202.041",
                       new Tuple <AminoAcid, List <Modification> >(StandardAminoAcidSet.GetAminoAcid('C'),
                                                                   new List <Modification> {
         Modification.Carbamidomethylation,
         Modification.Acetylation
     }));
 }
Exemplo n.º 27
0
        public void TestForVlad()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string specFilePath = @"D:\Research\Data\Vlad\raw\Alz_RA_C1_HCD_11012013_SW_03Nov2013.raw";
            const string dbFilePath   = @"D:\Research\Data\Vlad\database\ID_004221_1C042A1F.fasta";
            //const string dbFilePath = @"D:\Research\Data\Vlad\database\HBA_MOUSE.fasta";
            const string outputDir = @"D:\Research\Data\Vlad\Ic\POPSICLETest_M1";

            if (!File.Exists(specFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath);
            }

            if (!File.Exists(dbFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath);
            }

            // Configure amino acid set
            var acetylN              = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);
            var oxM                  = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC             = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC         = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            var thrToAla             = new SearchModification(Modification.ThrToAla, 'T', SequenceLocation.Everywhere, false);
            var dethiomethylM        = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false);
            var deamidatedN          = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false);
            var deamidatedQ          = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false);
            var serToAsn             = new SearchModification(Modification.SerToAsn, 'S', SequenceLocation.Everywhere, false);
            var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C',
                                                              SequenceLocation.ProteinNTerm, false);
            var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false);
            var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false);
            var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
//                glutathioneC,
                oxM,
//                dethiomethylM,
                acetylN,
                phosphoS,
                phosphoT,
                phosphoY
//                thrToAla,
//                serToAsn,
//                deamidatedN,
//                deamidatedQ,
//                pyroCarbamidomethylC
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            const int searchMode = 1;     // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term
            bool?     tda        = false; // true: target & decoy, false: target, null: decoy

            TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode);
        }
Exemplo n.º 28
0
        public void CreatePeptideAbundanceTableWithSkyline()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Reading Henry's results
            var          pepKeySet       = new HashSet <string>();
            var          resultDic       = new Dictionary <string, Tuple <double, double> >();
            const string henryResultPath = @"H:\Research\IPRG2015\Henry_results\tsv";

            if (!Directory.Exists(henryResultPath))
            {
                Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, henryResultPath);
            }

            var aaSet = new AminoAcidSet();

            foreach (var resultFile in Directory.GetFiles(henryResultPath, "*.tsv"))
            {
                var fileName = Path.GetFileName(resultFile);
                if (fileName == null)
                {
                    continue;
                }
                var sample = fileName.Substring(0, 2);
                Console.WriteLine("Processing {0}", sample);
                var tsvReader = new TsvFileParser(resultFile);
                var peptides  = tsvReader.GetData("Peptide").ToArray();
                var charge    = tsvReader.GetData("Charge").Select(c => Convert.ToInt32(c)).ToArray();
                var prob      = tsvReader.GetData("Prob").Select(Convert.ToDouble).ToArray();
                var qValue    = tsvReader.GetData("QValue").Select(Convert.ToDouble).ToArray();
                for (var i = 0; i < tsvReader.NumData; i++)
                {
                    var peptide     = peptides[i];
                    var nominalMass = GetNominalMass(aaSet, peptide);
                    var key         = sample + ":" + GetPeptide(peptides[i]) + ":" + nominalMass + ":" + charge[i];
                    var pepKey      = GetPeptide(peptides[i]) + ":" + nominalMass;
                    pepKeySet.Add(pepKey);
                    Tuple <double, double> existingScores;
                    if (resultDic.TryGetValue(key, out existingScores))
                    {
                        if (prob[i] > existingScores.Item1)
                        {
                            resultDic[key] = new Tuple <double, double>(prob[i], qValue[i]);
                        }
                    }
                    else
                    {
                        resultDic.Add(key, new Tuple <double, double>(prob[i], qValue[i]));
                    }
                }
            }

            const string skylineFilePath = @"H:\Research\IPRG2015\MySkyline\TransitionResults.csv";

            if (!File.Exists(skylineFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, skylineFilePath);
            }

            var skylineTable = new TsvFileParser(skylineFilePath, ',');

            const string outputFilePath = @"H:\Research\IPRG2015\MySkyline\SkylineTransitionResultsWithScores3.tsv";

            using (var writer = new StreamWriter(outputFilePath))
            {
                var peptides     = skylineTable.GetData("Peptide Sequence").ToArray();
                var samples      = skylineTable.GetData("Replicate Name").Select(s => "" + s[0] + s[2]).ToArray();
                var charges      = skylineTable.GetData("Precursor Charge").Select(c => Convert.ToInt32(c)).ToArray();
                var precursorMzs = skylineTable.GetData("Precursor Mz").Select(Convert.ToDouble).ToArray();

                writer.WriteLine("{0}\tProbability\tQValue", string.Join("\t", skylineTable.GetHeaders().Take(skylineTable.GetHeaders().Count - 2)));
                for (var i = 0; i < skylineTable.NumData; i++)
                {
                    var precursorMz = precursorMzs[i];
                    var charge      = charges[i];
                    var nominalMass = (int)Math.Round(((precursorMz - Constants.Proton) * charge - Composition.H2O.Mass) *
                                                      Constants.RescalingConstant);
                    var pepKey = peptides[i] + ":" + nominalMass;
                    if (!pepKeySet.Contains(pepKey))
                    {
                        //Console.WriteLine("Removing {0}", pepKey);
                        continue;
                    }
                    var    key = samples[i] + ":" + peptides[i] + ":" + nominalMass + ":" + charge;
                    double?prob = null, qValue = null;
                    Tuple <double, double> scores;
                    if (resultDic.TryGetValue(key, out scores))
                    {
                        prob   = scores.Item1;
                        qValue = scores.Item2;
                    }
                    var skylineData = skylineTable.GetRows()[i].Split(',');
                    for (var j = 0; j < skylineData.Length - 2; j++)
                    {
                        if (j != 2)
                        {
                            writer.Write(skylineData[j] + "\t");
                        }
                        else
                        {
                            writer.Write("" + skylineData[j][0] + skylineData[j][2] + "\t");
                        }
                    }
                    writer.WriteLine("{0}\t{1}",
                                     prob != null ? prob.ToString() : "NA",
                                     qValue != null ? qValue.ToString() : "NA");
                }
            }
            Console.WriteLine("Done");
        }
Exemplo n.º 29
0
 public IcBottomUpRescorer(string specFilePath, string icResultFilePath, string outputFilePath, AminoAcidSet aaSet, Tolerance tolerance)
 {
     _run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 0.0);
     Rescore(icResultFilePath, outputFilePath);
 }
Exemplo n.º 30
0
        public void TestCompositeScoring()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var pbfFilePath = Utils.GetPbfTestFilePath(false);
            var pbfFile     = Utils.GetTestFile(methodName, pbfFilePath);

            // Configure amino acid set
            var oxM      = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);
            var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false);
            var acetylN  = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false);

            const int numMaxModsPerProtein = 4;
            var       searchModifications  = new List <SearchModification>
            {
                dehydroC,
                oxM,
                acetylN
            };
            var aaSet    = new AminoAcidSet(searchModifications, numMaxModsPerProtein);
            var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28);

            var          run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName);
            const double filteringWindowSize    = 1.1;
            const int    isotopeOffsetTolerance = 2;
            var          tolerance    = new Tolerance(10);
            const int    minCharge    = 1;
            const int    maxCharge    = 20;
            var          graphFactory = new ProteinScoringGraphFactory(comparer, aaSet);
            var          aminoAcidSet = new AminoAcidSet();
            //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge);
            var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance);

            if (pbfFile.DirectoryName == null)
            {
                Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName);
            }

            var fileExt = new string[] { "IcTarget", "IcDecoy" };

            foreach (var ext in fileExt)
            {
                var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext);
                var parser         = new TsvFileParser(resultFileName);
                var scans          = parser.GetData("Scan").Select(s => Convert.ToInt32((string)s)).ToArray();
                var charges        = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray();
                var protSequences  = parser.GetData("Sequence").ToArray();
                var modStrs        = parser.GetData("Modifications").ToArray();
                var compositions   = parser.GetData("Composition").Select(Composition.Parse).ToArray();
                var protMass       = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray();

                var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext);

                using (var writer = new StreamWriter(outputFileName))
                {
                    writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue");

                    var lines = new string[parser.NumData];

                    //for (var i = 0; i < parser.NumData; i++)
                    Parallel.For(0, 30, i =>
                    {
                        var scan         = scans[i];
                        var charge       = charges[i];
                        var protSequence = protSequences[i];
                        var modStr       = modStrs[i];
                        var sequence     = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet);
                        // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O));
                        var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum;

                        if (ms2Spec == null)
                        {
                            Console.WriteLine("Could not get the spectrum datafor scan {0}", scan);
                        }
                        else
                        {
                            Assert.True(ms2Spec != null);
                            var scores = scorer.GetScores(sequence, charge, scan);

                            var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge,
                                                                                  isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7);

                            var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance,
                                                                                              comparer);
                            var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]);

                            var gf = new GeneratingFunction(graph);
                            gf.ComputeGeneratingFunction();

                            var specEvalue = gf.GetSpectralEValue(scores.Score);

                            var rowStr    = parser.GetRows()[i];
                            var items     = rowStr.Split('\t').ToArray();
                            var newRowStr = string.Join("\t", items, 0, 15);

                            //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue);
                            //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue);
                        }
                    });

                    foreach (var line in (from item in lines where !string.IsNullOrWhiteSpace(item) select item).Take(20))
                    {
                        Console.WriteLine(line);
                    }
                }
                Console.WriteLine("Done");
            }
        }