public IList <SequenceTag> GetAllSequenceTagString(int ms2ScanNum) { IList <SequenceTag> tags; lock (_ms2ScanToTagMap) { if (_ms2ScanToTagMap.TryGetValue(ms2ScanNum, out tags)) { return(tags); } } var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) { return(new List <SequenceTag>()); } var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids); tags = tagFinder.GetAllSequenceTagString(); lock (_ms2ScanToTagMap) { _ms2ScanToTagMap[ms2ScanNum] = tags; } return(tags); }
public void Generate(int ms2ScanNum) { var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) return; var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids); var tags = tagFinder.GetAllSequenceTagString(); lock (_ms2ScanToTagMap) { _ms2ScanToTagMap[ms2ScanNum] = tags; } }
public void Generate(int ms2ScanNum) { var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) { return; } var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids); var tags = tagFinder.GetAllSequenceTagString(); lock (_ms2ScanToTagMap) { _ms2ScanToTagMap[ms2ScanNum] = tags; } }
private List <SequenceTag> GetTags(List <ProductSpectrum> spectrums) { var tagDict = new Dictionary <string, SequenceTag>(); if (spectrums.Count == 0) { return(tagDict.Values.ToList()); } foreach (var spect in spectrums) { var tagFinder = new SequenceTagFinder(spect, new Tolerance(10), 4); var tags = tagFinder.GetAllSequenceTagString(); foreach (var t in tags) { if (tagDict.ContainsKey(t.Sequence)) { continue; } tagDict.Add(t.Sequence, t); } } return(tagDict.Values.ToList()); }
public IList<SequenceTag> GetAllSequenceTagString(int ms2ScanNum) { IList<SequenceTag> tags; lock (_ms2ScanToTagMap) { if (_ms2ScanToTagMap.TryGetValue(ms2ScanNum, out tags)) { return tags; } } var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) return new List<SequenceTag>(); var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids); tags = tagFinder.GetAllSequenceTagString(); lock (_ms2ScanToTagMap) { _ms2ScanToTagMap[ms2ScanNum] = tags; } return tags; }
public void TestGetProteinsWithTagMatchingSingleSpec() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07"; // const int scanNum = 5158; const int minTagLength = 7; const int minNumTagMatches = 1; var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet(); const int scanNum = 2; // Parse sequence tags //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag"); const string rawFilePath = ""; const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); //var tagParser = new SequenceTagParser(tagFileName, minTagLength); //var tags = tagParser.GetSequenceTags(scanNum); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var tagFinder = new SequenceTagFinder(spec, new Tolerance(5)); var tags = tagFinder.GetAllSequenceTagString(); var proteinsToTags = new Dictionary <string, IList <MatchedTag> >(); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); foreach (var index in matchedIndices) { var protein = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetOneBasedPositionInProtein(index); var matchedTag = new MatchedTag(tag, startIndex, 0.0); IList <MatchedTag> existingTags; if (proteinsToTags.TryGetValue(protein, out existingTags)) { existingTags.Add(matchedTag); } else { proteinsToTags.Add(protein, new List <MatchedTag> { matchedTag }); } } } foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count)) { if (entry.Value.Count < minNumTagMatches) { break; } var proteinName = entry.Key; var proteinSequence = fastaDb.GetProteinSequence(proteinName); var protein = new Sequence(proteinSequence, aminoAcidSet); Console.WriteLine(proteinName + "\t" + entry.Value.Count); foreach (var matchedTag in entry.Value) { var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}", matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass), seq, matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); } } }
private List<SequenceTag> GetTags(List<ProductSpectrum> spectrums) { var tagDict = new Dictionary<string,SequenceTag>(); if (spectrums.Count == 0) return tagDict.Values.ToList(); foreach (var spect in spectrums) { var tagFinder = new SequenceTagFinder(spect, new Tolerance(10), 4); var tags = tagFinder.GetAllSequenceTagString(); foreach (var t in tags) { if (tagDict.ContainsKey(t.Sequence)) continue; tagDict.Add(t.Sequence,t); } } return tagDict.Values.ToList(); }
public void TestSequenceTag() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string TestRawFile = @"D:\\Vlad_TopDown\\raw\\yufeng_column_test2.raw"; //const string TestResultFile = @"D:\\Vlad_TopDown\\results\\yufeng_column_test2_IcTda.tsv"; const string TestRawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string TestResultFile = @"D:\MassSpecFiles\training\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; //const string TestRawFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01.raw"; //const string TestResultFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01_IcTda.tsv"; if (!File.Exists(TestRawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFile); } if (!File.Exists(TestResultFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestResultFile); } // Configure amino acid set var aminoAcidList = new List<AminoAcid>(); foreach (var aa in AminoAcid.StandardAminoAcidArr) { aminoAcidList.Add(aa); aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Acetylation)); aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Oxidation)); } //const int MaxTags = 100000; var tsvParser = new TsvFileParser(TestResultFile); var headerList = tsvParser.GetHeaders(); var tsvData = tsvParser.GetAllData(); var ms2ScanNumbers = tsvData["Scan"]; var run = PbfLcMsRun.GetLcMsRun(TestRawFile); var nSpec = 0; var nHitSpec = 0; for (var i = 0; i < ms2ScanNumbers.Count; i++) //foreach(var scanNum in targetScans) { var scanNum = Int32.Parse(ms2ScanNumbers[i]); //if (scanNum != 4672) continue; var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum); var qValue = double.Parse(tsvData["QValue"].ElementAt(tsvIndex)); if (qValue > 0.01) break; var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim(); var modStr = tsvData["Modifications"].ElementAt(tsvIndex).Trim(); var tolerance = new Tolerance(5); var tagFinder = new SequenceTagFinder(spectrum, tolerance, 5, 8, aminoAcidList.ToArray()); var nTags = 0; var nHit = 0; var seqOjb = Sequence.CreateSequence(seqStr, modStr, new AminoAcidSet()); var compWithoutH2O = seqOjb.Composition - Composition.H2O; //Console.WriteLine(compWithoutH2O.Mass); foreach (var seqTagStr in tagFinder.GetAllSequenceTagString()) { if (seqStr.Contains(seqTagStr.Sequence)) //|| seqStr.Contains(Reverse(tagStr))) { //var idx = seqStr.IndexOf(seqTagStr.Sequence); //seqStr.Substring(0, idx) /*var comp2 = seqOjb.GetComposition(0, idx); Console.Write(comp2.Mass); Console.Write("\t"); Console.Write(seqTagStr.FlankingMass); Console.Write("\t"); Console.Write(seqTagStr.Sequence); Console.Write("\t"); Console.Write(seqTagStr.IsPrefix); Console.WriteLine(""); */ if (seqStr.Contains(seqTagStr.Sequence)) nHit++; } nTags++; } nSpec++; if (nHit > 0) nHitSpec++; Console.WriteLine(@"[{0}]seqLen = {1}: {2}/{3}", scanNum, seqStr.Length, nHit, nTags); } //var existingTags = tagFinder.ExtractExistingSequneceTags(sequence); Console.Write("{0}/{1}", nHitSpec, nSpec); }
public void TestGetProteinsWithTagMatchingSingleSpec() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07"; // const int scanNum = 5158; const int minTagLength = 7; const int minNumTagMatches = 1; var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet(); const int scanNum = 2; // Parse sequence tags //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag"); const string rawFilePath = ""; const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); //var tagParser = new SequenceTagParser(tagFileName, minTagLength); //var tags = tagParser.GetSequenceTags(scanNum); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var tagFinder = new SequenceTagFinder(spec, new Tolerance(5)); var tags = tagFinder.GetAllSequenceTagString(); var proteinsToTags = new Dictionary<string, IList<MatchedTag>>(); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); foreach (var index in matchedIndices) { var protein = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetOneBasedPositionInProtein(index); var matchedTag = new MatchedTag(tag, startIndex, 0.0); IList<MatchedTag> existingTags; if (proteinsToTags.TryGetValue(protein, out existingTags)) { existingTags.Add(matchedTag); } else { proteinsToTags.Add(protein, new List<MatchedTag> { matchedTag }); } } } foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count)) { if (entry.Value.Count < minNumTagMatches) break; var proteinName = entry.Key; var proteinSequence = fastaDb.GetProteinSequence(proteinName); var protein = new Sequence(proteinSequence, aminoAcidSet); Console.WriteLine(proteinName + "\t" + entry.Value.Count); foreach (var matchedTag in entry.Value) { var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}", matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass), seq, matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); } } }
public void TestSequenceTag() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); //const string TestRawFile = @"D:\\Vlad_TopDown\\raw\\yufeng_column_test2.raw"; //const string TestResultFile = @"D:\\Vlad_TopDown\\results\\yufeng_column_test2_IcTda.tsv"; const string TestRawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string TestResultFile = @"D:\MassSpecFiles\training\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; //const string TestRawFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01.raw"; //const string TestResultFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01_IcTda.tsv"; if (!File.Exists(TestRawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFile); } if (!File.Exists(TestResultFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestResultFile); } // Configure amino acid set var aminoAcidList = new List <AminoAcid>(); foreach (var aa in AminoAcid.StandardAminoAcidArr) { aminoAcidList.Add(aa); aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Acetylation)); aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Oxidation)); } //const int MaxTags = 100000; var tsvParser = new TsvFileParser(TestResultFile); var headerList = tsvParser.GetHeaders(); var tsvData = tsvParser.GetAllData(); var ms2ScanNumbers = tsvData["Scan"]; var run = PbfLcMsRun.GetLcMsRun(TestRawFile); var nSpec = 0; var nHitSpec = 0; for (var i = 0; i < ms2ScanNumbers.Count; i++) //foreach(var scanNum in targetScans) { var scanNum = Int32.Parse(ms2ScanNumbers[i]); //if (scanNum != 4672) continue; var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum); var qValue = double.Parse(tsvData["QValue"].ElementAt(tsvIndex)); if (qValue > 0.01) { break; } var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim(); var modStr = tsvData["Modifications"].ElementAt(tsvIndex).Trim(); var tolerance = new Tolerance(5); var tagFinder = new SequenceTagFinder(spectrum, tolerance, 5, 8, aminoAcidList.ToArray()); var nTags = 0; var nHit = 0; var seqOjb = Sequence.CreateSequence(seqStr, modStr, new AminoAcidSet()); var compWithoutH2O = seqOjb.Composition - Composition.H2O; //Console.WriteLine(compWithoutH2O.Mass); foreach (var seqTagStr in tagFinder.GetAllSequenceTagString()) { if (seqStr.Contains(seqTagStr.Sequence)) //|| seqStr.Contains(Reverse(tagStr))) { //var idx = seqStr.IndexOf(seqTagStr.Sequence); //seqStr.Substring(0, idx) /*var comp2 = seqOjb.GetComposition(0, idx); * * Console.Write(comp2.Mass); * Console.Write("\t"); * * Console.Write(seqTagStr.FlankingMass); * Console.Write("\t"); * Console.Write(seqTagStr.Sequence); * Console.Write("\t"); * Console.Write(seqTagStr.IsPrefix); * Console.WriteLine(""); */ if (seqStr.Contains(seqTagStr.Sequence)) { nHit++; } } nTags++; } nSpec++; if (nHit > 0) { nHitSpec++; } Console.WriteLine(@"[{0}]seqLen = {1}: {2}/{3}", scanNum, seqStr.Length, nHit, nTags); } //var existingTags = tagFinder.ExtractExistingSequneceTags(sequence); Console.Write("{0}/{1}", nHitSpec, nSpec); }