public IEnumerable <int> GetMatchingMs2ScanNums(double sequenceMass) { var sequenceMassBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(sequenceMass); IList <int> ms2ScanNums; if (_sequenceMassBinToScanNumsMap.TryGetValue(sequenceMassBinNum, out ms2ScanNums)) { return(ms2ScanNums); } ms2ScanNums = new List <int>(); var averagineEnvelope = Averagine.GetIsotopomerEnvelope(sequenceMass); var mostAbundantIsotopeIndex = averagineEnvelope.MostAbundantIsotopeIndex; for (var precursorCharge = _minCharge; precursorCharge <= _maxCharge; precursorCharge++) { var mostAbundantIsotopeMz = Ion.GetIsotopeMz(sequenceMass, precursorCharge, mostAbundantIsotopeIndex); var binNumber = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(mostAbundantIsotopeMz); IList <ChargeAndScanNum> chargeAndScanNumList; if (!_mostAbundantIsotopeMzIndexToChargeAndScanNums.TryGetValue(binNumber, out chargeAndScanNumList)) { continue; } foreach (var chargeAndScanNum in chargeAndScanNumList) { if (chargeAndScanNum.Charge == precursorCharge) { ms2ScanNums.Add(chargeAndScanNum.ScanNum); } } } _sequenceMassBinToScanNumsMap.Add(sequenceMassBinNum, ms2ScanNums); return(ms2ScanNums); }
public void TestPsm() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const char pre = 'R'; const string sequence = "LENWPPASLADDL"; const char post = 'A'; const string annotation = "R.LENWPPASLADDL._"; const int charge = 2; const int ms2ScanNum = 25534; var aaSet = new AminoAcidSet(); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 0, 0); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 2, 10, 0, 1.1); ms2Scorer.DeconvoluteAllProductSpectra(); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); var graph = SequenceGraph.CreateGraph(aaSet, annotation); graph.SetSink(0); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedBottomUpScorer(run, aaSet, 1, 15, new Tolerance(10)); var refinedScore = informedScorer.GetScores(pre, sequence, post, composition, charge, ms2ScanNum); Console.WriteLine("RefinedScores: {0}", refinedScore); }
internal DeisotopedPeak(double monoIsotopicMass, int charge, double score) { MonoIsotopicMass = monoIsotopicMass; MonoIsotopicMassIndex = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(MonoIsotopicMass); Charge = charge; Score = score; }
public FeatureBasedTagSearchEngine( LcMsRun run, Ms1FtParser featureParser, ProductScorerBasedOnDeconvolutedSpectra ms2Scorer, SequenceTagParser tagParser, FastaDatabase fastaDb, Tolerance tolerance, AminoAcidSet aaSet, double maxSequenceMass = 50000.0, int minProductIonCharge = 1, int maxProductIonCharge = 20) { _run = run; _ms2Scorer = ms2Scorer; _featureParser = featureParser; _ms1FtFilter = new Ms1FtFilter(run, tolerance, featureParser.Ms1FtFileName); _tagParser = tagParser; _fastaDb = fastaDb; _searchableDb = new SearchableDatabase(fastaDb); _tolerance = tolerance; _aaSet = aaSet; _maxSequenceMass = maxSequenceMass; _minProductIonCharge = minProductIonCharge; _maxProductIonCharge = maxProductIonCharge; }
public IEnumerable <int> GetMatchingMs2ScanNums(double sequenceMass) { var sequenceMassBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(sequenceMass); IList <int> ms2ScanNums; if (_sequenceMassBinToScanNumsMap.TryGetValue(sequenceMassBinNum, out ms2ScanNums)) { return(ms2ScanNums); } return(new int[0]); }
private readonly Dictionary <int, IList <int> > _sequenceMassBinToScanNumsMap; // mass -> scan numbers private void PrecomputePossibleSequenceMasses() { var ms2ScanNums = _run.GetScanNumbers(2); foreach (var ms2ScanNum in ms2ScanNums) { var possibleSequenceMasses = GetPossibleSequenceMasses(ms2ScanNum); var binNumHash = new HashSet <int>(); foreach (var sequenceMass in possibleSequenceMasses) { var dMass = sequenceMass * _ppmTolerance * 1e-6; var minMass = sequenceMass - dMass; var maxMass = sequenceMass + dMass; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { if (!binNumHash.Add(binNum)) { continue; } IList <int> scanNums; if (_sequenceMassBinToScanNumsMap.TryGetValue(binNum, out scanNums)) { scanNums.Add(ms2ScanNum); } else { scanNums = new List <int> { ms2ScanNum }; _sequenceMassBinToScanNumsMap.Add(binNum, scanNums); } } } } }
private void PrecomputeMostAbundantMzToMatches() { var ms2ScanNums = _run.GetScanNumbers(2); _mostAbundantIsotopeMzIndexToChargeAndScanNums = new Dictionary <int, IList <ChargeAndScanNum> >(); foreach (var ms2ScanNum in ms2ScanNums) { var possibleSequenceMatches = GetPossibleSequenceMatches(ms2ScanNum); foreach (var match in possibleSequenceMatches) { var mostAbundantIsotopeMz = match.MostAbundantIsotopeMz; var charge = match.Charge; var deltaMz = mostAbundantIsotopeMz * _ppmTolerance * 1e-6; var minMz = mostAbundantIsotopeMz - deltaMz; var maxMz = mostAbundantIsotopeMz + deltaMz; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minMz); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxMz); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { IList <ChargeAndScanNum> chargeAndScanNums; if (_mostAbundantIsotopeMzIndexToChargeAndScanNums.TryGetValue(binNum, out chargeAndScanNums)) { chargeAndScanNums.Add(new ChargeAndScanNum(charge, ms2ScanNum)); } else { chargeAndScanNums = new List <ChargeAndScanNum> { new ChargeAndScanNum(charge, ms2ScanNum) }; _mostAbundantIsotopeMzIndexToChargeAndScanNums.Add(binNum, chargeAndScanNums); } } } } }
public bool RunSearch(double corrThreshold) { var sw = new Stopwatch(); ErrorMessage = string.Empty; Console.Write(@"Reading raw file..."); sw.Start(); _run = InMemoryLcMsRun.GetLcMsRun(SpecFilePath, 1.4826, 1.4826); _bottomUpScorer = new InformedBottomUpScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance); sw.Stop(); var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Determining precursor masses..."); sw.Start(); var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(_run, PrecursorIonTolerance, MinPrecursorIonCharge, MaxPrecursorIonCharge, 400, 5000, corrThreshold, 0, 0); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Deconvoluting MS2 spectra..."); sw.Start(); _ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra( _run, MinProductIonCharge, MaxProductIonCharge, new Tolerance(10), 0 ); _ms2ScorerFactory.DeconvoluteAllProductSpectra(); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); // Target database var targetDb = new FastaDatabase(DatabaseFilePath); // string dirName = OutputDir ?? Path.GetDirectoryName(SpecFilePath); var baseName = Path.GetFileNameWithoutExtension(SpecFilePath); var targetOutputFilePath = Path.Combine(OutputDir, baseName + TargetFileExtension); var decoyOutputFilePath = Path.Combine(OutputDir, baseName + DecoyFileExtension); var tdaOutputFilePath = Path.Combine(OutputDir, baseName + TdaFileExtension); if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target)) { sw.Reset(); Console.Write(@"Reading the target database..."); sw.Start(); targetDb.Read(); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.WriteLine(@"Searching the target database"); sw.Start(); var targetMatches = RunSearch(GetAnnotationsAndOffsets(targetDb), ms1Filter, false); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Target database search elapsed time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Rescoring and writing target results..."); sw.Start(); WriteResultsToFile(targetMatches, targetOutputFilePath, targetDb); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed time: {0:f4} sec", sec); } if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy)) { // Decoy database sw.Reset(); Console.Write(@"Reading the decoy database..."); sw.Start(); var decoyDb = targetDb.Decoy(Enzyme); decoyDb.Read(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.WriteLine(@"Searching the decoy database"); sw.Start(); var decoyMatches = RunSearch(GetAnnotationsAndOffsets(decoyDb), ms1Filter, true); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Decoy database search elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Rescoring and writing decoy results..."); sw.Start(); WriteResultsToFile(decoyMatches, decoyOutputFilePath, decoyDb); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed time: {0:f4} sec", sec); } if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both)) { var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath); if (fdrCalculator.HasError()) { ErrorMessage = fdrCalculator.ErrorMessage; Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage); return false; } fdrCalculator.WriteTo(tdaOutputFilePath); } Console.WriteLine(@"Done"); return true; }
public void TestMs2Caching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); //const int minPrecursorIonCharge = 3; // 3 //const int maxPrecursorIonCharge = 30;// 67 //const int minProductIonCharge = 1; //const int maxProductIonCharge = 10; var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var runCache = new ProductScorerBasedOnDeconvolutedSpectra(run); runCache.DeconvoluteAllProductSpectra(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void Test43KProtein() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var dethiomethylM = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false); var deamidatedN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); var deamidatedQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C', SequenceLocation.ProteinNTerm, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, oxM, dethiomethylM, acetylN, //phosphoS, //phosphoT, //phosphoY, deamidatedN, // deamidatedQ, glutathioneC, pyroCarbamidomethylC, nitrosylC, nethylmaleimideC }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); // var aaSet = new AminoAcidSet(); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) return; var ms1Filter = new SimpleMs1Filter(); var ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(run); foreach(var ms2ScanNum in Ms2ScanNums) ms2ScorerFactory.GetScorer(ms2ScanNum); for (var numNTermCleavages = 0; numNTermCleavages <= 0; numNTermCleavages++) { if (numNTermCleavages > 0) seqGraph.CleaveNTerm(); var numProteoforms = seqGraph.GetNumProteoformCompositions(); var modCombs = seqGraph.GetModificationCombinations(); for (var modIndex = 0; modIndex < numProteoforms; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; var modCombinations = modCombs[modIndex]; foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass)) { var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) continue; var charge = (int) Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var scorer = ms2ScorerFactory.GetMs2Scorer(ms2ScanNum); var score = seqGraph.GetFragmentScore(scorer); if (score <= 3) continue; var precursorIon = new Ion(protCompositionWithH2O, charge); var sequence = protSequence.Substring(numNTermCleavages); var pre = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1]; var post = annotation[annotation.Length - 1]; Console.WriteLine("{0}.{1}.{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}", pre, sequence, post, ms2ScanNum, modCombinations, precursorIon.GetMostAbundantIsotopeMz(), precursorIon.Charge, precursorIon.Composition.Mass, score); } } } }
public void TestPrSm() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw"; //const string annotation = // "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" + // "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" + // "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" + // "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" + // "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" + // "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" + // "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._"; //var aaSet = new AminoAcidSet(); //const int charge = 60; //const int ms2ScanNum = 46661; const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const int ms2ScanNum = 19011; const int charge = 7; const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._"; var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true); var modVal = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0)); var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList(); searchMods.Add(acetylN); const int numMaxModsPerProtein = 1; var aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions()); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15); ms2Scorer.GetScorer(ms2ScanNum); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); Assert.NotNull(scorer, "Scorer is null!"); for (var i = 0; i < graph.GetNumProteoformCompositions(); i++) { graph.SetSink(i); Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10)); var refinedScore = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum); Console.WriteLine("Modifications: {0}", refinedScore.Modifications); Console.WriteLine("Composition: {0}", composition); Console.WriteLine("RefinedScores: {0}", refinedScore); } }