public IcTopDownLauncher( string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet, string featureFilePath = null) { ErrorMessage = string.Empty; SpecFilePath = specFilePath; DatabaseFilePath = dbFilePath; AminoAcidSet = aaSet; OutputDir = outputDir; FeatureFilePath = featureFilePath; MinSequenceLength = 21; MaxSequenceLength = 300; MaxNumNTermCleavages = 1; MaxNumCTermCleavages = 0; MinPrecursorIonCharge = 2; MaxPrecursorIonCharge = 60; MinProductIonCharge = 1; MaxProductIonCharge = 20; MinSequenceMass = 2000.0; MaxSequenceMass = 50000.0; PrecursorIonTolerance = new Tolerance(10); ProductIonTolerance = new Tolerance(10); RunTargetDecoyAnalysis = DatabaseSearchMode.Both; SearchMode = InternalCleavageType.SingleInternalCleavage; MaxNumThreads = 4; ScanNumbers = null; NumMatchesPerSpectrum = 3; TagBasedSearch = true; }
public IcRescorer(string specFilePath, string icResultFilePath, string outputFilePath, AminoAcidSet aaSet, Tolerance tolerance, double ms2CorrThreshold = 0.7 , int minProductIonCharge = 1, int maxProductIonCharge = 10) { var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); _topDownScorer = new InformedTopDownScorer(run, aaSet, minProductIonCharge, maxProductIonCharge, tolerance, ms2CorrThreshold); Rescore(icResultFilePath, outputFilePath); }
public InformedProteomicsAnalysis( InMemoryLcMsRun run, IEnumerable<string> peptideEnumerator, AminoAcidSet aminoAcidSet): this(run, peptideEnumerator, aminoAcidSet, 1, 3) { }
private static void TestCountingPeptides() { var aaSet = new AminoAcidSet(); var sw = new Stopwatch(); sw.Start(); //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002166_F86E3B2F.fasta"; const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_003456_9B916A8B.fasta"; // const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta"; var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); indexedDb.Read(); //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount(); var peptides = indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin); Parallel.ForEach(peptides, annotationAndOffset => //foreach(var annotationAndOffset in peptides) { var annotation = annotationAndOffset.Annotation; var offset = annotationAndOffset.Offset; var graph = SequenceGraph.CreateGraph(aaSet, annotation); } ) ; // Console.WriteLine("NumPeptides: {0}", numPeptides); sw.Stop(); var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"{0:f4} sec", sec); }
public ShiftedSequenceGraph(AminoAcidSet aminoAcidSet, double shiftedMass, bool isForward, int maxSequenceLength, double maxSequenceMass = 50000.0) { _aminoAcidSet = aminoAcidSet; _modificationParams = aminoAcidSet.GetModificationParams(); _isForward = isForward; _index = 0; _maxSeqIndex = maxSequenceLength + 2; // shift + Term + length _maxSequenceMass = maxSequenceMass; _aminoAcidSequence = new AminoAcid[_maxSeqIndex]; var shiftAa = new AminoAcid('\0', "Shift", new CompositionWithDeltaMass(shiftedMass)); _aminoAcidSequence[0] = shiftAa; ShiftMass = shiftedMass; _fragmentComposition = new Composition.Composition[_maxSeqIndex]; _fragmentComposition[0] = shiftAa.Composition; _graph = new Node[_maxSeqIndex][]; _graph[0] = new[] { new Node(0) }; _nodeComposition = new Composition.Composition[_maxSeqIndex][]; _compNodeComposition = new Composition.Composition[_maxSeqIndex][]; for (var i = 0; i < _maxSeqIndex; i++) { _compNodeComposition[i] = new Composition.Composition[_modificationParams.NumModificationCombinations]; _nodeComposition[i] = new Composition.Composition[_modificationParams.NumModificationCombinations]; } IsValid = true; }
public FeatureBasedTagSearchEngine( LcMsRun run, Ms1FtParser featureParser, ProductScorerBasedOnDeconvolutedSpectra ms2Scorer, SequenceTagParser tagParser, FastaDatabase fastaDb, Tolerance tolerance, AminoAcidSet aaSet, double maxSequenceMass = 50000.0, int minProductIonCharge = 1, int maxProductIonCharge = 20) { _run = run; _ms2Scorer = ms2Scorer; _featureParser = featureParser; _ms1FtFilter = new Ms1FtFilter(run, tolerance, featureParser.Ms1FtFileName); _tagParser = tagParser; _fastaDb = fastaDb; _searchableDb = new SearchableDatabase(fastaDb); _tolerance = tolerance; _aaSet = aaSet; _maxSequenceMass = maxSequenceMass; _minProductIonCharge = minProductIonCharge; _maxProductIonCharge = maxProductIonCharge; }
public void TestBuildingReverseGraph() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string annotation = "_.MARTKQTARK._"; // Configure amino acid set var methylK = new SearchModification(Modification.Methylation, 'K', SequenceLocation.Everywhere, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { methylK, //pyroGluQ, oxM }; const int numMaxModsPerProtein = 2; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); foreach (var composition in seqGraph.GetSequenceCompositions()) { Console.WriteLine("{0}\t{1}", composition, composition.Mass); } }
public ProteinScoringGraphFactory(IMassBinning comparer, AminoAcidSet aminoAcidSet) { _comparer = comparer; _adjList = new LinkedList<ScoringGraphEdge>[_comparer.NumberOfBins]; for (var i = 0; i < _comparer.NumberOfBins; i++) _adjList[i] = new LinkedList<ScoringGraphEdge>(); var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet); var aminoAcidArray = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet); for (var i = 0; i < _comparer.NumberOfBins; i++) { var mi = _comparer.GetMass(i); var fineNodeMass = mi; for (var a = 0; a < aminoAcidArray.Length; a++) { var aa = aminoAcidArray[a]; var j = _comparer.GetBinNumber(fineNodeMass + aa.Mass); if (j < 0 || j >= _comparer.NumberOfBins) continue; _adjList[j].AddLast(new ScoringGraphEdge(i)); if (i == 0 && !(aa is ModifiedAminoAcid)) { foreach (var terminalMod in terminalModifications) { var modifiedAa = new ModifiedAminoAcid(aa, terminalMod); j = _comparer.GetBinNumber(fineNodeMass + modifiedAa.Mass); if (j < 0 || j >= _comparer.NumberOfBins) continue; _adjList[j].AddLast(new ScoringGraphEdge(i)); } } } } }
public void TestPsm() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const char pre = 'R'; const string sequence = "LENWPPASLADDL"; const char post = 'A'; const string annotation = "R.LENWPPASLADDL._"; const int charge = 2; const int ms2ScanNum = 25534; var aaSet = new AminoAcidSet(); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 0, 0); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 2, 10, 0, 1.1); ms2Scorer.DeconvoluteAllProductSpectra(); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); var graph = SequenceGraph.CreateGraph(aaSet, annotation); graph.SetSink(0); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedBottomUpScorer(run, aaSet, 1, 15, new Tolerance(10)); var refinedScore = informedScorer.GetScores(pre, sequence, post, composition, charge, ms2ScanNum); Console.WriteLine("RefinedScores: {0}", refinedScore); }
public ScanBasedTagSearchEngine( LcMsRun run, ISequenceTagFinder seqTagFinder, LcMsPeakMatrix featureFinder, FastaDatabase fastaDb, Tolerance tolerance, AminoAcidSet aaSet, CompositeScorerFactory ms2ScorerFactory = null, int minMatchedTagLength = DefaultMinMatchedTagLength, double maxSequenceMass = 50000.0, int minProductIonCharge = 1, int maxProductIonCharge = 20) { _run = run; _featureFinder = featureFinder; _searchableDb = new SearchableDatabase(fastaDb); _tolerance = tolerance; _aaSet = aaSet; _minMatchedTagLength = minMatchedTagLength; _maxSequenceMass = maxSequenceMass; _minProductIonCharge = minProductIonCharge; _maxProductIonCharge = maxProductIonCharge; MinScan = int.MinValue; MaxScan = int.MaxValue; _ms2ScorerFactory = ms2ScorerFactory; _seqTagFinder = seqTagFinder; }
public void TestFitScoreCalculationEtd() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestLcMsRun.TestTopDownRawFilePathEtd)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestLcMsRun.TestTopDownRawFilePathCid); } var run = InMemoryLcMsRun.GetLcMsRunScanRange(TestLcMsRun.TestTopDownRawFilePathEtd, 810, 810); var spec = run.GetSpectrum(810) as ProductSpectrum; Assert.True(spec != null); const string suf54 = "ENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK"; var suf54Comp = new AminoAcidSet().GetComposition(suf54); Assert.True(suf54Comp != null); var ionType = new IonTypeFactory(10).GetIonType("z6"); var ion = ionType.GetIon(suf54Comp); //ion.Composition.ComputeApproximateIsotopomerEnvelop(); Console.WriteLine("MonoMz: {0}, MonoMass: {1}", ion.GetMonoIsotopicMz(), ion.Composition.Mass); var fitScore = spec.GetFitScore(ion, new Tolerance(15), 0.1); Console.WriteLine("FitScore: {0}", fitScore); Assert.True(fitScore < 0.15); }
public IList<SpectrumMatch> Read() { var specMatches = new List<SpectrumMatch>(); var tsvFile = new TsvFileParser(_fileName); var precursorCharges = tsvFile.GetData(PrecursorChargeHeader); var scans = tsvFile.GetData(ScanHeader); var peptides = tsvFile.GetData(TopDownPeptideHeader); if (peptides != null) { var peptideSet = new HashSet<string>(); const double filterThreshold = QValueThreshold; var filterValues = tsvFile.GetData(QValueHeader); var aset = new AminoAcidSet(); for (int i = 0; i < peptides.Count; i++) { if (Convert.ToDouble(filterValues[i]) > filterThreshold || peptideSet.Contains(peptides[i])) continue; peptideSet.Add(peptides[i]); var scanNum = Convert.ToInt32(scans[i]); int precursorCharge = Convert.ToInt32(precursorCharges[i]); specMatches.Add(new SpectrumMatch(new Sequence(peptides[i], aset), _lcms, scanNum, precursorCharge, _decoy)); } } return specMatches; }
public MatchedTagSet(string sequence, AminoAcidSet aminoAcidSet, Tolerance tolerance, Tolerance relaxedTolerance) { _sequence = sequence; _aminoAcidSet = aminoAcidSet; _tolerance = tolerance; _relaxedTolerance = relaxedTolerance; _tags = new List<MatchedTag>(); }
public InformedTopDownScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance, double ms2CorrThreshold = 0.7) { Run = run; AminoAcidSet = aaSet; MinProductCharge = minProductCharge; MaxProductCharge = maxProductCharge; Tolerance = tolerance; Ms2CorrThreshold = ms2CorrThreshold; }
public InformedBottomUpScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance) { Run = run; AminoAcidSet = aaSet; MinProductCharge = minProductCharge; MaxProductCharge = maxProductCharge; Tolerance = tolerance; _rankScorer = new RankScore(ActivationMethod.HCD, Ms2DetectorType.Orbitrap, Enzyme.Trypsin, Protocol.Standard); _scoredSpectra = new Dictionary<int, ScoredSpectrum>(); }
public InformedProteomicsAnalysis( InMemoryLcMsRun run, IEnumerable<string> peptideEnumerator, AminoAcidSet aminoAcidSet, int minCharge, int maxCharge) { Run = run; PeptideEnumerator = peptideEnumerator; AminoAcidSet = aminoAcidSet; MinCharge = minCharge; MaxCharge = maxCharge; }
public CompositeScorerFactory( ILcMsRun run, IMassBinning comparer, AminoAcidSet aaSet, int minProductCharge = 1, int maxProductCharge = 20, double productTolerancePpm = 10, int isotopeOffsetTolerance = 2, double filteringWindowSize = 1.1 ) : this(run, comparer, aaSet, minProductCharge, maxProductCharge, new Tolerance(productTolerancePpm), isotopeOffsetTolerance, filteringWindowSize) { }
public void TestParsingManyMods() { const string modFilePath = @"\\protoapps\UserData\Jungkap\Lewy\db\Mods.txt"; var aaSet = new AminoAcidSet(modFilePath); //aaSet.Display(); //SequenceLocation.ProteinNTerm var residue = AminoAcid.ProteinNTerm.Residue; var location = SequenceLocation.ProteinNTerm; var aa = aaSet.GetAminoAcid(residue, location); Console.Write("{0}\t{1}\t{2}", residue, aa.Mass, aa.Composition); foreach (var modIndex in aaSet.GetModificationIndices(residue, location)) { var modification = aaSet.GetModificationParams().GetModification(modIndex); Console.WriteLine(modification.Mass); //Console.Write("\t" + _modificationParams.GetModification(modIndex)); } Console.WriteLine(); residue = AminoAcid.ProteinCTerm.Residue; location = SequenceLocation.ProteinCTerm; aa = aaSet.GetAminoAcid(residue, location); Console.Write("{0}\t{1}\t{2}", residue, aa.Mass, aa.Composition); foreach (var modIndex in aaSet.GetModificationIndices(residue, location)) { var modification = aaSet.GetModificationParams().GetModification(modIndex); Console.WriteLine(modification.Mass); //Console.Write("\t" + _modificationParams.GetModification(modIndex)); } //foreach (var aa in AminoAcid.StandardAminoAcidArr) /* var keys = _locationSpecificResidueMap[location].Keys.ToArray(); Array.Sort(keys); foreach (var residue in keys) { var aa = GetAminoAcid(residue, location); Console.Write("{0}\t{1}\t{2}", residue, aa.Mass, aa.Composition); foreach (var modIndex in GetModificationIndices(residue, location)) { Console.Write("\t" + _modificationParams.GetModification(modIndex)); } Console.WriteLine(); } } */ }
public void TestFitScoreCalculationCid() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestLcMsRun.TestTopDownRawFilePathCid)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestLcMsRun.TestTopDownRawFilePathCid); } var run = InMemoryLcMsRun.GetLcMsRunScanRange(TestLcMsRun.TestTopDownRawFilePathCid, 5743, 5743); var spec = run.GetSpectrum(5743); Assert.True(spec != null); const string protein = "MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGIVVDYVLEFDVPDELIVDRIVGRRVHAASGRVYHVKFNPPKVEGKDDVTGEDLTTRKDDQEETVRKRLVEYHQMTAPLIGYYQKEAEAGNTKYAKVDGTQAVADVRAALEKILG"; var protComp = new AminoAcidSet().GetComposition(protein) + Composition.H2O; Assert.True(protComp != null); Assert.True(protComp.C == 1035); Assert.True(protComp.H == 1683); Assert.True(protComp.N == 289); Assert.True(protComp.O == 318); Assert.True(protComp.P == 0); Assert.True(protComp.S == 7); Assert.True(Math.Abs(protComp.Mass - 23473.245267145) < 0.0000001); Assert.True(protComp.NominalMass == 23461); var ion = new Ion(protComp, 20); // ion.Composition.ComputeApproximateIsotopomerEnvelop(); var isotopomerEnvelop = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); Console.WriteLine(@"MonoMz: {0}, MonoMass: {1}", ion.GetMonoIsotopicMz(), ion.Composition.Mass); var matchedPeaks = spec.GetAllIsotopePeaks(ion, new Tolerance(15), 0.1); for (var i = 0; i < matchedPeaks.Length; i++) { Console.WriteLine(@"{0} {1} {2} {3}", i, ion.GetIsotopeMz(i), isotopomerEnvelop[i], matchedPeaks[i] == null ? 0 : matchedPeaks[i].Intensity); } var fitScore = spec.GetFitScore(ion, new Tolerance(15), 0.1); var cosine = spec.GetConsineScore(ion, new Tolerance(15), 0.1); var corr = spec.GetCorrScore(ion, new Tolerance(15), 0.1); Console.WriteLine(@"FitScore: {0}", fitScore); Console.WriteLine(@"Cosine: {0}", cosine); Console.WriteLine(@"Corr: {0}", corr); Assert.True(Math.Abs(fitScore - 0.181194589537041) < 0.0001); Assert.True(Math.Abs(cosine - 0.917609346566222) < 0.0001); Assert.True(Math.Abs(corr - 0.808326778009839) < 0.0001); }
/// <summary> /// Create a graph representing the annotation. Annotation is reversed. /// </summary> /// <param name="aaSet">amino acid set</param> /// <param name="annotation">annotation (e.g. G.PEPTIDER.K or _.PEPTIDER._)</param> /// <returns></returns> public static SequenceGraph CreateGraph(AminoAcidSet aaSet, string annotation) { const char delimiter = (char)FastaDatabase.Delimiter; if (annotation == null || !Regex.IsMatch(annotation, @"^[A-Z" + delimiter + @"]\.[A-Z]+\.[A-Z" + delimiter + @"]$")) return null; var nTerm = annotation[0] == FastaDatabase.Delimiter ? AminoAcid.ProteinNTerm : AminoAcid.PeptideNTerm; var cTerm = annotation[annotation.Length - 1] == FastaDatabase.Delimiter ? AminoAcid.ProteinCTerm : AminoAcid.PeptideCTerm; var sequence = annotation.Substring(2, annotation.Length - 4); return CreateGraph(aaSet, nTerm, sequence, cTerm); }
public void TestBuildingSequenceGraphLongProtein() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Configure amino acid set const int numMaxModsPerProtein = 6; var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false); var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { pyroGluQ, //dehydro, //cysteinylC, //glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); //const string protAnnotation = "A.HAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK.-"; //const string protAnnotation = // "_.QQ._"; const string protAnnotation = "_.MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ._"; var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); var seqCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < seqCompositions.Length; modIndex++) { var seqComposition = seqCompositions[modIndex]; Console.WriteLine("SequenceComposition: {0}", seqComposition); foreach (var composition in seqGraph.GetFragmentCompositions(modIndex, 0)) { //if (composition.GetMass() > seqComposition.GetMass()) { Console.WriteLine("***Seq: {0}, Frag: {1}", seqComposition, composition); } } } }
public TagMatchFinder( ProductSpectrum spec, IScorer ms2Scorer, LcMsPeakMatrix featureFinder, string proteinSequence, Tolerance tolerance, AminoAcidSet aaSet, double maxSequenceMass) { _spec = spec; _ms2Scorer = ms2Scorer; _featureFinder = featureFinder; _proteinSequence = proteinSequence; _tolerance = tolerance; _aaSet = aaSet; _maxSequenceMass = maxSequenceMass; }
public IcBottomUpLauncher( string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet, Enzyme enzyme) { ErrorMessage = string.Empty; SpecFilePath = specFilePath; DatabaseFilePath = dbFilePath; AminoAcidSet = aaSet; Enzyme = enzyme; if (outputDir == null) { OutputDir = Path.GetDirectoryName(SpecFilePath); } else { if (!Directory.Exists(outputDir)) { if (File.Exists(outputDir) && !File.GetAttributes(outputDir).HasFlag(FileAttributes.Directory)) { throw new Exception(outputDir + " is not a directory!"); } Directory.CreateDirectory(outputDir); } OutputDir = outputDir; } OutputDir = outputDir; MinSequenceLength = 6; MaxSequenceLength = 30; MinPrecursorIonCharge = 1; MaxPrecursorIonCharge = 4; MinProductIonCharge = 1; MaxProductIonCharge = 3; PrecursorIonTolerance = new Tolerance(10); ProductIonTolerance = new Tolerance(10); RunTargetDecoyAnalysis = DatabaseSearchMode.Both; NumTolerableTermini = 1; NumMatchesPerSpectrum = 10; }
public void TestSearchWithTagGeneration() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const string fastaFilePath = @"D:\MSPathFinder\Fasta\ID_002216_235ACCEA.fasta"; //const string fastaFilePath = @"D:\MassSpecFiles\60k\ID_004973_9BA6912F.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var tolerance = new Tolerance(10); var modsFilePath = @"D:\MSPathFinder\Fasta\Mods.txt"; if (!File.Exists(modsFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, modsFilePath); } var aaSet = new AminoAcidSet(modsFilePath); //TestTagBasedSearch(run, fastaDb, tolerance, aaSet); var tagSearchEngine = new ScanBasedTagSearchEngine(run, new SequenceTagGenerator(run, new Tolerance(8)), new LcMsPeakMatrix(run), fastaDb, tolerance,aaSet); var matchedTags = tagSearchEngine.RunSearch(4672); foreach (var match in matchedTags) { Console.Write(match.Sequence); Console.WriteLine("\t{0}\t{1}\t{2}", match.TagMatch.StartIndex, match.TagMatch.EndIndex, match.TagMatch.Mass); } }
public CompositeScorerFactory( ILcMsRun run, IMassBinning comparer, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance productTolerance, int isotopeOffsetTolerance = 2, double filteringWindowSize = 1.1) { _run = run; _minProductCharge = minProductCharge; _maxProductCharge = maxProductCharge; _productTolerance = productTolerance; FilteringWindowSize = filteringWindowSize; IsotopeOffsetTolerance = isotopeOffsetTolerance; _ms2Scorer = new Dictionary<int, IScorer>(); _comparer = comparer; _scoringGraphFactory = new ProteinScoringGraphFactory(comparer, aaSet); }
public void TestScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var rawFile = @"\\protoapps\UserData\Jungkap\Joshua\testData\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf" ; var resultFile = @"\\protoapps\UserData\Jungkap\Joshua\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, rawFile); return; } if (!File.Exists(resultFile)) { Console.WriteLine(@"Warning: Skipping test {0} since file not found: {1}", methodName, resultFile); return; } var tsvParser = new TsvFileParser(resultFile); var tsvData = tsvParser.GetAllData(); var ms2ScanNumbers = tsvData["Scan"]; var run = PbfLcMsRun.GetLcMsRun(rawFile, 0, 0); for (int i = 0; i < 1; i++) { var scanNum = Int32.Parse(ms2ScanNumbers[i]); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum); var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim(); var seqMod = tsvData["Modifications"].ElementAt(tsvIndex).Trim(); var aaSet = new AminoAcidSet(); var sequence = Sequence.CreateSequence(seqStr, seqMod, aaSet); Console.WriteLine(sequence.Count); var score = GetScoreTest(sequence, spectrum); Console.WriteLine(scanNum + ":" + score); } }
static MgfSequenceReader() { StandardAminoAcidSet = new AminoAcidSet(Modification.Carbamidomethylation); Modifications = new Dictionary<string, Tuple<AminoAcid, List<Modification>>>(); Modifications.Add("99.032", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('G'), new List<Modification> { Modification.Acetylation })); Modifications.Add("113.048", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('A'), new List<Modification> { Modification.Acetylation })); Modifications.Add("129.043", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('S'), new List<Modification> { Modification.Acetylation })); Modifications.Add("141.079", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('V'), new List<Modification> { Modification.Acetylation })); Modifications.Add("143.059", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('T'), new List<Modification> { Modification.Acetylation })); Modifications.Add("147.035", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('M'), new List<Modification> { Modification.Oxidation })); Modifications.Add("157.038", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('D'), new List<Modification> { Modification.Acetylation })); Modifications.Add("160.03", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('C'), new List<Modification> { Modification.Carbamidomethylation })); Modifications.Add("171.054", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('E'), new List<Modification> { Modification.Acetylation })); Modifications.Add("173.051", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('M'), new List<Modification> { Modification.Acetylation })); Modifications.Add("189.046", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('F'), new List<Modification> { Modification.Acetylation })); Modifications.Add("202.041", new Tuple<AminoAcid, List<Modification>>(StandardAminoAcidSet.GetAminoAcid('C'), new List<Modification> { Modification.Carbamidomethylation, Modification.Acetylation })); }
public FeatureBasedTagSearchEngine( LcMsRun run, Ms1FtParser featureParser, SequenceTagParser tagParser, FastaDatabase fastaDb, Tolerance tolerance, AminoAcidSet aaSet, double maxSequenceMass = 50000.0, int minProductIonCharge = 1, int maxProductIonCharge = 20) : this( run, featureParser, null, tagParser, fastaDb, tolerance, aaSet, maxSequenceMass, minProductIonCharge, maxProductIonCharge) { }
public void ValidateIcResultsWithModifications() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1_Rescored.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var parser = new TsvFileParser(resultFilePath); var sequences = parser.GetData("Sequence"); var modifications = parser.GetData("Modifications"); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var scanNums = parser.GetData("ScanNum").Select(s => Convert.ToInt32(s)).ToArray(); var aaSet = new AminoAcidSet(); for (var i = 0; i < parser.NumData; i++) { var sequenceComp = aaSet.GetComposition(sequences[i]) + Composition.H2O; var modComposition = Composition.Zero; var modsStr = modifications[i].Substring(1, modifications[i].Length - 2); var mods = modsStr.Split(','); foreach(var modStr in mods) { if (modStr.Length == 0) continue; var modName = modStr.Split()[0]; var mod = Modification.Get(modName); modComposition += mod.Composition; } var compFromSeqAndMods = sequenceComp + modComposition; Assert.True(compFromSeqAndMods.Equals(compositions[i])); } }
public void TestRescoring() { //const string specFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; //const string sequence = "SGWYELSKSSNDQFKFVLKAGNGEVILTSELYTGKSGAMNGIESVQTNSPIEARYAKEVAKNDKPYFNLKAANHQIIGTSQMYSSTA"; //const int scanNum = 4084; const string sequence = "SKTKHPLPEQWQKNQEAAKATQVAFDLDEKFQYSIRKAALDAGVSPSDQIRTILGLSVSRRPTRPRLTVSLNADDYVQLAEKYDLNADAQLEIKRRVLEDLVRFVAED"; const int scanNum = 5448; const int charge = 11; // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List<SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var composition = aaSet.GetComposition(sequence) + Composition.H2O; var run = PbfLcMsRun.GetLcMsRun(specFilePath, 0, 0); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 15, new Tolerance(10)); var scores = informedScorer.GetScores(AminoAcid.ProteinNTerm, sequence, AminoAcid.ProteinCTerm, composition, charge, scanNum); Console.WriteLine("Total Score = " + scores.Score); Console.WriteLine("#Fragments = " + scores.NumMatchedFrags); }
public static AminoAcidSet GetStandardAminoAcidSet() { return(_standardAminoAcidSet ?? (_standardAminoAcidSet = new AminoAcidSet())); }
/// <summary> /// Add an amino acid residue to this generator. /// </summary> /// <param name="index">index to add the amino acid. 0 is C-term. 1 is the C-term amino acid.</param> /// <param name="residue">amino acid residue to add.</param> /// <param name="loc">location of the residue</param> /// <returns>true if residue is a valid amino acid; false otherwise.</returns> private bool PutAminoAcid(int index, char residue, SequenceLocation loc) { _index = index + 1; var aminoAcid = AminoAcidSet.GetAminoAcid(residue, loc); if (aminoAcid == null) // residue is not valid { return(false); } var fragmentComposition = _fragmentComposition[_index - 1] + aminoAcid.Composition; if (fragmentComposition.Mass > _maxSequenceMass) { return(false); } _aminoAcidSequence[_index] = aminoAcid; _fragmentComposition[_index] = fragmentComposition; var modIndices = AminoAcidSet.GetModificationIndices(residue, loc); if (!modIndices.Any()) // No modification { _graph[_index] = new Node[_graph[_index - 1].Length]; for (var i = 0; i < _graph[_index - 1].Length; i++) { _graph[_index][i] = new Node(_graph[_index - 1][i].ModificationCombinationIndex, i); } } else { var modCombIndexToNodeMap = new Dictionary <int, Node>(); for (var i = 0; i < _graph[_index - 1].Length; i++) { var prevNodeIndex = i; var prevNode = _graph[_index - 1][i]; var prevModCombIndex = prevNode.ModificationCombinationIndex; // unmodified edge if (modCombIndexToNodeMap.TryGetValue(prevModCombIndex, out var unmodifiedEdgeNode)) { unmodifiedEdgeNode.AddPrevNodeIndex(prevNodeIndex); } else { modCombIndexToNodeMap.Add(prevModCombIndex, new Node(prevModCombIndex, prevNodeIndex)); } // modified edge foreach (var modIndex in modIndices) { var modCombIndex = ModificationParams.GetModificationCombinationIndex( prevNode.ModificationCombinationIndex, modIndex); if (modCombIndex < 0) // too many modifications { continue; } if (modCombIndexToNodeMap.TryGetValue(modCombIndex, out var modifiedEdgeNode)) { modifiedEdgeNode.AddPrevNodeIndex(prevNodeIndex); } else { modCombIndexToNodeMap.Add(modCombIndex, new Node(modCombIndex, prevNodeIndex)); } } _graph[_index] = modCombIndexToNodeMap.Values.ToArray(); } } return(true); }
/// <summary> /// Create a graph representing the sequence. Sequence is reversed. /// </summary> /// <param name="aaSet">amino acid set</param> /// <param name="nTerm">N-term amino acid</param> /// <param name="sequence">sequence</param> /// <param name="cTerm">C-term amino acid</param> /// <returns>sequence graph</returns> public static SequenceGraph CreateGraph(AminoAcidSet aaSet, AminoAcid nTerm, string sequence, AminoAcid cTerm) { var seqGraph = new SequenceGraph(aaSet, nTerm, sequence, cTerm); return(seqGraph.IsValid ? seqGraph : null); }
/// <summary> /// Add an amino acid residue to this generator. /// </summary> /// <param name="index">index to add the amino acid. 0 is C-term. 1 is the C-term amino acid.</param> /// <param name="residue">amino acid residue to add.</param> /// <returns>true if residue is a valid amino acid; false otherwise.</returns> private bool PutAminoAcid(int index, char residue) { _index = index + 1; SequenceLocation?location = null; if (_index == 1) // C-term residue { if (residue == AminoAcid.PeptideCTerm.Residue) { location = SequenceLocation.PeptideCTerm; } else if (residue == AminoAcid.ProteinCTerm.Residue) { location = SequenceLocation.ProteinCTerm; } } else if (_index == _aminoAcidSequence.Length - 1 - NumNTermCleavages) // N-term residue { if (residue == AminoAcid.PeptideNTerm.Residue) { location = SequenceLocation.PeptideNTerm; } else if (residue == AminoAcid.ProteinNTerm.Residue) { location = SequenceLocation.ProteinNTerm; } } else if (_index == 2) // Amino acid at the C-term { if (_aminoAcidSequence[1] == AminoAcid.PeptideCTerm) { location = SequenceLocation.PeptideCTerm; } else if (_aminoAcidSequence[1] == AminoAcid.ProteinCTerm) { location = SequenceLocation.ProteinCTerm; } } else if (_index == _aminoAcidSequence.Length - 2 - NumNTermCleavages) // Amino acid at the N-term { if (_aminoAcidSequence[_aminoAcidSequence.Length - 1] == AminoAcid.PeptideNTerm) { location = SequenceLocation.PeptideNTerm; } else if (_aminoAcidSequence[_aminoAcidSequence.Length - 1] == AminoAcid.ProteinNTerm) { location = SequenceLocation.ProteinNTerm; } } else { location = SequenceLocation.Everywhere; } if (location == null) { return(false); } var loc = (SequenceLocation)location; var aminoAcid = AminoAcidSet.GetAminoAcid(residue, loc); if (aminoAcid == null) // residue is not valid { return(false); } _aminoAcidSequence[_index] = aminoAcid; _suffixComposition[_index] = _suffixComposition[_index - 1] + aminoAcid.Composition; var modIndices = AminoAcidSet.GetModificationIndices(residue, loc); if (!modIndices.Any()) // No modification { _graph[_index] = new Node[_graph[_index - 1].Length]; for (var i = 0; i < _graph[_index - 1].Length; i++) { _graph[_index][i] = new Node(_graph[_index - 1][i].ModificationCombinationIndex, i); } } else { var modCombIndexToNodeMap = new Dictionary <int, Node>(); for (var i = 0; i < _graph[_index - 1].Length; i++) { var prevNodeIndex = i; var prevNode = _graph[_index - 1][i]; var prevModCombIndex = prevNode.ModificationCombinationIndex; Node newNode; // unmodified edge if (modCombIndexToNodeMap.TryGetValue(prevModCombIndex, out newNode)) { newNode.AddPrevNodeIndex(prevNodeIndex); } else { modCombIndexToNodeMap.Add(prevModCombIndex, new Node(prevModCombIndex, prevNodeIndex)); } // modified edge foreach (var modIndex in modIndices) { var modCombIndex = ModificationParams.GetModificationCombinationIndex( prevNode.ModificationCombinationIndex, modIndex); if (modCombIndex < 0) // too many modifications { continue; } if (modCombIndexToNodeMap.TryGetValue(modCombIndex, out newNode)) { newNode.AddPrevNodeIndex(prevNodeIndex); } else { modCombIndexToNodeMap.Add(modCombIndex, new Node(modCombIndex, prevNodeIndex)); } } _graph[_index] = modCombIndexToNodeMap.Values.ToArray(); } } return(true); }
/// <summary> /// Build a sequence from the supplied character sequence, using the provided amino acid set /// </summary> /// <param name="sequence"></param> /// <param name="aminoAcidSet"></param> public Sequence(string sequence, AminoAcidSet aminoAcidSet) : this(sequence.Select(aminoAcidSet.GetAminoAcid)) { }
public static AminoAcidSet GetStandardAminoAcidSetWithCarboamidomethylCys() { return(_standardAminoAcidSetWithCarboamidomethylCys ?? (_standardAminoAcidSetWithCarboamidomethylCys = new AminoAcidSet(Modification.Carbamidomethylation))); }