Example #1
0
        public IList <SequenceTag> GetAllSequenceTagString(int ms2ScanNum)
        {
            IList <SequenceTag> tags;

            lock (_ms2ScanToTagMap)
            {
                if (_ms2ScanToTagMap.TryGetValue(ms2ScanNum, out tags))
                {
                    return(tags);
                }
            }

            var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum;

            if (spec == null)
            {
                return(new List <SequenceTag>());
            }
            var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids);

            tags = tagFinder.GetAllSequenceTagString();

            lock (_ms2ScanToTagMap)
            {
                _ms2ScanToTagMap[ms2ScanNum] = tags;
            }

            return(tags);
        }
Example #2
0
        public void Generate(int ms2ScanNum)
        {
            var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
            if (spec == null) return;
            var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids);

            var tags = tagFinder.GetAllSequenceTagString();
          
            lock (_ms2ScanToTagMap)
            {
                _ms2ScanToTagMap[ms2ScanNum] = tags;
            }
                  
        }
Example #3
0
        public void Generate(int ms2ScanNum)
        {
            var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum;

            if (spec == null)
            {
                return;
            }
            var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids);

            var tags = tagFinder.GetAllSequenceTagString();

            lock (_ms2ScanToTagMap)
            {
                _ms2ScanToTagMap[ms2ScanNum] = tags;
            }
        }
        private List <SequenceTag> GetTags(List <ProductSpectrum> spectrums)
        {
            var tagDict = new Dictionary <string, SequenceTag>();

            if (spectrums.Count == 0)
            {
                return(tagDict.Values.ToList());
            }
            foreach (var spect in spectrums)
            {
                var tagFinder = new SequenceTagFinder(spect, new Tolerance(10), 4);
                var tags      = tagFinder.GetAllSequenceTagString();
                foreach (var t in tags)
                {
                    if (tagDict.ContainsKey(t.Sequence))
                    {
                        continue;
                    }
                    tagDict.Add(t.Sequence, t);
                }
            }
            return(tagDict.Values.ToList());
        }
Example #5
0
        public IList<SequenceTag> GetAllSequenceTagString(int ms2ScanNum)
        {
            IList<SequenceTag> tags;

            lock (_ms2ScanToTagMap)
            {
                if (_ms2ScanToTagMap.TryGetValue(ms2ScanNum, out tags))
                {
                    return tags;
                }
            }

            var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
            if (spec == null) return new List<SequenceTag>();
            var tagFinder = new SequenceTagFinder(spec, _tolerance, _minTagLen, _maxTagLen, _aminoAcids);
            tags = tagFinder.GetAllSequenceTagString();

            lock (_ms2ScanToTagMap)
            {
                _ms2ScanToTagMap[ms2ScanNum] = tags;
            }

            return tags;
        }
Example #6
0
        public void TestGetProteinsWithTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07";
            //            const int scanNum = 5158;
            const int minTagLength     = 7;
            const int minNumTagMatches = 1;
            var       aminoAcidSet     = AminoAcidSet.GetStandardAminoAcidSet();

            const int scanNum = 2;
            // Parse sequence tags
            //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");

            const string rawFilePath = "";

            const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            //var tagParser = new SequenceTagParser(tagFileName, minTagLength);
            //var tags = tagParser.GetSequenceTags(scanNum);
            var run       = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var spec      = run.GetSpectrum(scanNum) as ProductSpectrum;
            var tagFinder = new SequenceTagFinder(spec, new Tolerance(5));
            var tags      = tagFinder.GetAllSequenceTagString();

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                foreach (var index in matchedIndices)
                {
                    var protein    = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetOneBasedPositionInProtein(index);
                    var matchedTag = new MatchedTag(tag, startIndex, 0.0);
                    IList <MatchedTag> existingTags;
                    if (proteinsToTags.TryGetValue(protein, out existingTags))
                    {
                        existingTags.Add(matchedTag);
                    }
                    else
                    {
                        proteinsToTags.Add(protein, new List <MatchedTag> {
                            matchedTag
                        });
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}",
                                      matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass),
                                      seq,
                                      matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass),
                                      matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }
            }
        }
 private List<SequenceTag> GetTags(List<ProductSpectrum> spectrums)
 {
     var tagDict = new Dictionary<string,SequenceTag>();
     if (spectrums.Count == 0) return tagDict.Values.ToList();
     foreach (var spect in spectrums)
     {
         var tagFinder = new SequenceTagFinder(spect, new Tolerance(10), 4);
         var tags = tagFinder.GetAllSequenceTagString();
         foreach (var t in tags)
         {
             if (tagDict.ContainsKey(t.Sequence)) continue;
             tagDict.Add(t.Sequence,t);
         }
     }
     return tagDict.Values.ToList();
 }
Example #8
0
        public void TestSequenceTag()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            //const string TestRawFile = @"D:\\Vlad_TopDown\\raw\\yufeng_column_test2.raw";
            //const string TestResultFile = @"D:\\Vlad_TopDown\\results\\yufeng_column_test2_IcTda.tsv";
            const string TestRawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            const string TestResultFile = @"D:\MassSpecFiles\training\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";
            //const string TestRawFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01.raw";
            //const string TestResultFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01_IcTda.tsv";

            if (!File.Exists(TestRawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFile);
            }

            if (!File.Exists(TestResultFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestResultFile);
            }

            // Configure amino acid set
            
            var aminoAcidList = new List<AminoAcid>();
            foreach (var aa in AminoAcid.StandardAminoAcidArr)
            {
                aminoAcidList.Add(aa);
                aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Acetylation));
                aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Oxidation));

            }


            //const int MaxTags = 100000;
            var tsvParser = new TsvFileParser(TestResultFile);
            var headerList = tsvParser.GetHeaders();
            var tsvData = tsvParser.GetAllData();
            var ms2ScanNumbers = tsvData["Scan"];
        
            var run = PbfLcMsRun.GetLcMsRun(TestRawFile);
            var nSpec = 0;
            var nHitSpec = 0;

            for (var i = 0; i < ms2ScanNumbers.Count; i++)
            //foreach(var scanNum in targetScans)
            {
                var scanNum = Int32.Parse(ms2ScanNumbers[i]);

                //if (scanNum != 4672) continue;
                
                var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum;

                int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum);
                var qValue = double.Parse(tsvData["QValue"].ElementAt(tsvIndex));
                if (qValue > 0.01) break;

                var seqStr = tsvData["Sequence"].ElementAt(tsvIndex).Trim();
                var modStr = tsvData["Modifications"].ElementAt(tsvIndex).Trim();
                var tolerance = new Tolerance(5);
                var tagFinder = new SequenceTagFinder(spectrum, tolerance, 5, 8, aminoAcidList.ToArray());
                var nTags = 0;
                var nHit = 0;

                var seqOjb = Sequence.CreateSequence(seqStr, modStr, new AminoAcidSet());
                var compWithoutH2O = seqOjb.Composition - Composition.H2O;

                //Console.WriteLine(compWithoutH2O.Mass);

                foreach (var seqTagStr in tagFinder.GetAllSequenceTagString())
                {
                    if (seqStr.Contains(seqTagStr.Sequence)) //|| seqStr.Contains(Reverse(tagStr)))
                    {

                        //var idx = seqStr.IndexOf(seqTagStr.Sequence);

                        //seqStr.Substring(0, idx)
                        /*var comp2 = seqOjb.GetComposition(0, idx);

                        Console.Write(comp2.Mass);
                        Console.Write("\t");

                        Console.Write(seqTagStr.FlankingMass);
                        Console.Write("\t");
                        Console.Write(seqTagStr.Sequence);
                        Console.Write("\t");
                        Console.Write(seqTagStr.IsPrefix);
                        Console.WriteLine("");
                        */
                        if (seqStr.Contains(seqTagStr.Sequence)) nHit++;
                    }
                    nTags++;                    
                }
                
                nSpec++;
                if (nHit > 0) nHitSpec++;

                Console.WriteLine(@"[{0}]seqLen = {1}: {2}/{3}", scanNum, seqStr.Length, nHit, nTags);
            }
            //var existingTags = tagFinder.ExtractExistingSequneceTags(sequence);
            Console.Write("{0}/{1}", nHitSpec, nSpec);
        }
Example #9
0
        public void TestGetProteinsWithTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07";
            //            const int scanNum = 5158;
            const int minTagLength = 7;
            const int minNumTagMatches = 1;
            var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet();

            const int scanNum = 2;
            // Parse sequence tags
            //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");

            const string rawFilePath = "";

            const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            //var tagParser = new SequenceTagParser(tagFileName, minTagLength);
            //var tags = tagParser.GetSequenceTags(scanNum);
            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
            var tagFinder = new SequenceTagFinder(spec, new Tolerance(5));
            var tags = tagFinder.GetAllSequenceTagString();

            var proteinsToTags = new Dictionary<string, IList<MatchedTag>>();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                foreach (var index in matchedIndices)
                {
                    var protein = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetOneBasedPositionInProtein(index);
                    var matchedTag = new MatchedTag(tag, startIndex, 0.0);
                    IList<MatchedTag> existingTags;
                    if (proteinsToTags.TryGetValue(protein, out existingTags))
                    {
                        existingTags.Add(matchedTag);
                    }
                    else
                    {
                        proteinsToTags.Add(protein, new List<MatchedTag> { matchedTag });
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches) break;
                var proteinName = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}",
                        matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass), 
                        seq, 
                        matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass), 
                        matchedTag.StartIndex,
                        matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);

                }
            }
        }
Example #10
0
        public void TestSequenceTag()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            //const string TestRawFile = @"D:\\Vlad_TopDown\\raw\\yufeng_column_test2.raw";
            //const string TestResultFile = @"D:\\Vlad_TopDown\\results\\yufeng_column_test2_IcTda.tsv";
            const string TestRawFile    = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf";
            const string TestResultFile = @"D:\MassSpecFiles\training\IdResult\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv";

            //const string TestRawFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01.raw";
            //const string TestResultFile = @"D:\MassSpecFiles\Lewy\Lewy_intact_01_IcTda.tsv";

            if (!File.Exists(TestRawFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFile);
            }

            if (!File.Exists(TestResultFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestResultFile);
            }

            // Configure amino acid set

            var aminoAcidList = new List <AminoAcid>();

            foreach (var aa in AminoAcid.StandardAminoAcidArr)
            {
                aminoAcidList.Add(aa);
                aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Acetylation));
                aminoAcidList.Add(new ModifiedAminoAcid(aa, Modification.Oxidation));
            }

            //const int MaxTags = 100000;
            var tsvParser      = new TsvFileParser(TestResultFile);
            var headerList     = tsvParser.GetHeaders();
            var tsvData        = tsvParser.GetAllData();
            var ms2ScanNumbers = tsvData["Scan"];

            var run      = PbfLcMsRun.GetLcMsRun(TestRawFile);
            var nSpec    = 0;
            var nHitSpec = 0;

            for (var i = 0; i < ms2ScanNumbers.Count; i++)
            //foreach(var scanNum in targetScans)
            {
                var scanNum = Int32.Parse(ms2ScanNumbers[i]);

                //if (scanNum != 4672) continue;

                var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum;

                int tsvIndex = ms2ScanNumbers.FindIndex(x => Int32.Parse(x) == scanNum);
                var qValue   = double.Parse(tsvData["QValue"].ElementAt(tsvIndex));
                if (qValue > 0.01)
                {
                    break;
                }

                var seqStr    = tsvData["Sequence"].ElementAt(tsvIndex).Trim();
                var modStr    = tsvData["Modifications"].ElementAt(tsvIndex).Trim();
                var tolerance = new Tolerance(5);
                var tagFinder = new SequenceTagFinder(spectrum, tolerance, 5, 8, aminoAcidList.ToArray());
                var nTags     = 0;
                var nHit      = 0;

                var seqOjb         = Sequence.CreateSequence(seqStr, modStr, new AminoAcidSet());
                var compWithoutH2O = seqOjb.Composition - Composition.H2O;

                //Console.WriteLine(compWithoutH2O.Mass);

                foreach (var seqTagStr in tagFinder.GetAllSequenceTagString())
                {
                    if (seqStr.Contains(seqTagStr.Sequence)) //|| seqStr.Contains(Reverse(tagStr)))
                    {
                        //var idx = seqStr.IndexOf(seqTagStr.Sequence);

                        //seqStr.Substring(0, idx)

                        /*var comp2 = seqOjb.GetComposition(0, idx);
                         *
                         * Console.Write(comp2.Mass);
                         * Console.Write("\t");
                         *
                         * Console.Write(seqTagStr.FlankingMass);
                         * Console.Write("\t");
                         * Console.Write(seqTagStr.Sequence);
                         * Console.Write("\t");
                         * Console.Write(seqTagStr.IsPrefix);
                         * Console.WriteLine("");
                         */
                        if (seqStr.Contains(seqTagStr.Sequence))
                        {
                            nHit++;
                        }
                    }
                    nTags++;
                }

                nSpec++;
                if (nHit > 0)
                {
                    nHitSpec++;
                }

                Console.WriteLine(@"[{0}]seqLen = {1}: {2}/{3}", scanNum, seqStr.Length, nHit, nTags);
            }
            //var existingTags = tagFinder.ExtractExistingSequneceTags(sequence);
            Console.Write("{0}/{1}", nHitSpec, nSpec);
        }