Exemplo n.º 1
0
        private void WriteResultsToFile(SortedSet <DatabaseSequenceSpectrumMatch>[] matches, string outputFilePath, FastaDatabase database)
        {
            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("Scan\tPre\tSequence\tPost\tModifications\tComposition\tProteinName\tProteinDesc" +
                                 "\tProteinLength\tStart\tEnd\tCharge\tMostAbundantIsotopeMz\tMass\t#MatchedFragments\tIcScore"
                                 );
                for (var scanNum = _run.MinLcScan; scanNum <= _run.MaxLcScan; scanNum++)
                {
                    if (matches[scanNum] == null)
                    {
                        continue;
                    }
                    foreach (var match in matches[scanNum].Reverse())
                    {
                        var sequence    = match.Sequence;
                        var offset      = match.Offset;
                        var start       = database.GetOneBasedPositionInProtein(offset) + 1 + match.NumNTermCleavages;
                        var end         = start + sequence.Length - 1;
                        var proteinName = database.GetProteinName(match.Offset);
                        var protLength  = database.GetProteinLength(proteinName);
                        var ion         = match.Ion;

                        var scores = _bottomUpScorer.GetScores(match, ion.Composition, ion.Charge, scanNum);

                        if (ion == null)
                        {
                            Console.WriteLine(@"Null ion!");
                        }
                        if (scores == null)
                        {
                            Console.WriteLine(@"Null scores");
                        }

                        // Note for DblToString(value, 9, true), by having "9" and "true",
                        // values between 100 and 999 Da will have 7 digits after the decimal place, and
                        // values between 1000 and 9999 will have 6 digits after the decimal place

                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}",
                                         scanNum,
                                         match.Pre,
                                         sequence,                                                             // Sequence
                                         match.Post,
                                         scores.Modifications,                                                 // Modifications
                                         ion.Composition,                                                      // Composition
                                         proteinName,                                                          // ProteinName
                                         database.GetProteinDescription(match.Offset),                         // ProteinDescription
                                         protLength,                                                           // ProteinLength
                                         start,                                                                // Start
                                         end,                                                                  // End
                                         ion.Charge,                                                           // precursorCharge
                                         StringUtilities.DblToString(ion.GetMostAbundantIsotopeMz(), 9, true), // MostAbundantIsotopeMz
                                         StringUtilities.DblToString(ion.Composition.Mass, 9, true),           // Mass
                                         match.Score,
                                         scores.Score                                                          // Score (re-scored)
                                         );
                    }
                }
            }
        }
Exemplo n.º 2
0
        public void TestGettingProteinLengthAndPosition()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db = new FastaDatabase(dbFile);

            db.Read();
            var indexedDb = new IndexedDatabase(db);

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsets(6, 20, 2, 0, Enzyme.Trypsin))
            {
                var annotation = peptideAnnotationAndOffset.Annotation;
                var offset     = peptideAnnotationAndOffset.Offset;
                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}",
                                  annotation,
                                  offset,
                                  db.GetProteinName(offset),
                                  db.GetProteinLength(db.GetProteinName(offset)),
                                  db.GetOneBasedPositionInProtein(offset) + 1);
            }
        }
Exemplo n.º 3
0
        public void TestGettingProteinLengthAndPosition()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta"));

            var db = new FastaDatabase(fastaFile.FullName);

            db.Read();
            var indexedDb = new IndexedDatabase(db);

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsets(6, 20, 2, 0, Enzyme.Trypsin))
            {
                var annotation = peptideAnnotationAndOffset.Annotation;
                var offset     = peptideAnnotationAndOffset.Offset;
                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}",
                                  annotation,
                                  offset,
                                  db.GetProteinName(offset),
                                  db.GetProteinLength(db.GetProteinName(offset)),
                                  db.GetOneBasedPositionInProtein(offset) + 1);
            }
        }
Exemplo n.º 4
0
        private void WriteResultsToFile(DatabaseSequenceSpectrumMatch[] matches, string outputFilePath, FastaDatabase database)
        {
            using (var writer = new StreamWriter(outputFilePath))
            {
                writer.WriteLine("Scan\tPre\tSequence\tPost\tModifications\tComposition\tProteinName\tProteinDesc" +
                                 "\tProteinLength\tStart\tEnd\tCharge\tMostAbundantIsotopeMz\tMass\t#MatchedFragments\tProbability\tSpecEValue\tEValue");

                foreach (var scanNum in _ms2ScanNums)
                {
                    var match = matches[scanNum];
                    if (match == null)
                    {
                        continue;
                    }

                    var sequence           = match.Sequence;
                    var offset             = match.Offset;
                    var start              = database.GetOneBasedPositionInProtein(offset) + 1 + match.NumNTermCleavages;
                    var end                = start + sequence.Length - 1;
                    var proteinName        = database.GetProteinName(match.Offset);
                    var protLength         = database.GetProteinLength(proteinName);
                    var ion                = match.Ion;
                    var proteinDescription = database.GetProteinDescription(match.Offset);
                    var probability        = CompositeScorer.GetProbability(match.Score);

                    // Note for DblToString(value, 9, true), by having "9" and "true",
                    // values between 100 and 999 Da will have 7 digits after the decimal place, and
                    // values between 1000 and 9999 will have 6 digits after the decimal place
                    writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}\t{13}\t{14}\t{15}\t{16}\t{17}",
                                     scanNum,
                                     match.Pre,                                                                                              // Pre
                                     sequence,                                                                                               // Sequence
                                     match.Post,                                                                                             // Post
                                     match.ModificationText,                                                                                 // Modifications
                                     ion.Composition,                                                                                        // Composition
                                     proteinName,                                                                                            // ProteinName
                                     proteinDescription,                                                                                     // ProteinDescription
                                     protLength,                                                                                             // ProteinLength
                                     start,                                                                                                  // Start position in protein
                                     end,                                                                                                    // End position in protein
                                     ion.Charge,                                                                                             // precursorCharge
                                     StringUtilities.DblToString(ion.GetMostAbundantIsotopeMz(), 9, true),                                   // MostAbundantIsotopeMz
                                     StringUtilities.DblToString(ion.Composition.Mass, 9, true),                                             // Mass
                                     match.NumMatchedFragments,                                                                              // (Number of matched fragments)
                                     StringUtilities.DblToString(probability, 4),                                                            // Probability
                                     StringUtilities.DblToString(ExcelMinValue(match.SpecEvalue), 6, true, 0.001),                           // EValue; will be displayed using scientific notation if the value is less than 0.001
                                     StringUtilities.DblToString(ExcelMinValue(match.SpecEvalue * database.GetNumEntries()), 6, true, 0.001) // SpecEValue; will be displayed using scientific notation if the value is less than 0.001
                                     );
                }
            }
        }
Exemplo n.º 5
0
        public void TestSearching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db           = new FastaDatabase(dbFile);
            var searchableDb = new SearchableDatabase(db);
            //const string pattern = "NSGSHFCGGSLINSQWVVSAAH";
            const string pattern  = "FPTDDDDK";
            var          position = searchableDb.Search(pattern);

            Assert.True(position >= 0);
            Console.WriteLine("Position: {0}", position);
            Console.WriteLine("Matched indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern)));
            Console.WriteLine("Protein indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern).Select(i => db.GetOneBasedPositionInProtein(i))));
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Exemplo n.º 6
0
        public void TestGetProteinsWithTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07";
            //            const int scanNum = 5158;
            const int minTagLength     = 7;
            const int minNumTagMatches = 1;
            var       aminoAcidSet     = AminoAcidSet.GetStandardAminoAcidSet();

            const int scanNum = 2;
            // Parse sequence tags
            //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");

            const string rawFilePath = "";

            const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            //var tagParser = new SequenceTagParser(tagFileName, minTagLength);
            //var tags = tagParser.GetSequenceTags(scanNum);
            var run       = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var spec      = run.GetSpectrum(scanNum) as ProductSpectrum;
            var tagFinder = new SequenceTagFinder(spec, new Tolerance(5));
            var tags      = tagFinder.GetAllSequenceTagString();

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                foreach (var index in matchedIndices)
                {
                    var protein    = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetOneBasedPositionInProtein(index);
                    var matchedTag = new MatchedTag(tag, startIndex, 0.0);
                    IList <MatchedTag> existingTags;
                    if (proteinsToTags.TryGetValue(protein, out existingTags))
                    {
                        existingTags.Add(matchedTag);
                    }
                    else
                    {
                        proteinsToTags.Add(protein, new List <MatchedTag> {
                            matchedTag
                        });
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}",
                                      matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass),
                                      seq,
                                      matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass),
                                      matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }
            }
        }
Exemplo n.º 7
0
        public void TestSearching()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta"));

            var db           = new FastaDatabase(fastaFile.FullName);
            var searchableDb = new SearchableDatabase(db);
            //const string pattern = "NSGSHFCGGSLINSQWVVSAAH";
            const string pattern  = "FPTDDDDK";
            var          position = searchableDb.Search(pattern);

            Assert.True(position >= 0);
            Console.WriteLine("Position: {0}", position);
            Console.WriteLine("Matched indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern)));
            Console.WriteLine("Protein indices: {0}", string.Join(",", searchableDb.FindAllMatchedSequenceIndices(pattern).Select(i => db.GetOneBasedPositionInProtein(i))));
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }