Ejemplo n.º 1
0
        public void TestEnumeratingProteins()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta"));

            var db          = new FastaDatabase(fastaFile.FullName);
            var indexedDb   = new IndexedDatabase(db);
            var numPeptides = 0;

            foreach (var annotationAndOffset in indexedDb.IntactSequenceAnnotationsAndOffsetsWithCTermCleavagesLargerThan(100, 300, 3))
            {
                if (numPeptides < 20)
                {
                    Console.WriteLine(annotationAndOffset.Annotation);
                }
                numPeptides++;
            }
            sw.Stop();

            Console.WriteLine("Peptide count: {0}", numPeptides);

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 2
0
        public void TestEnumeratingProteins()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);

            foreach (var annotationAndOffset in indexedDb.IntactSequenceAnnotationsAndOffsetsWithCTermCleavagesLargerThan(100, 300, 3))
            {
                Console.WriteLine(annotationAndOffset.Annotation);
            }
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 3
0
        public void TestGettingProteinLengthAndPosition()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta"));

            var db = new FastaDatabase(fastaFile.FullName);

            db.Read();
            var indexedDb = new IndexedDatabase(db);

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsets(6, 20, 2, 0, Enzyme.Trypsin))
            {
                var annotation = peptideAnnotationAndOffset.Annotation;
                var offset     = peptideAnnotationAndOffset.Offset;
                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}",
                                  annotation,
                                  offset,
                                  db.GetProteinName(offset),
                                  db.GetProteinLength(db.GetProteinName(offset)),
                                  db.GetOneBasedPositionInProtein(offset) + 1);
            }
        }
Ejemplo n.º 4
0
        public void TestForManyMods(string dbFile, int expectedMaxLen300, int expectedMaxLen400, int expectedMaxLen500)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER));

            var indexedDb = new IndexedDatabase(new FastaDatabase(fastaFile.FullName));

            indexedDb.Read();

            var totalPeptidesMaxLen300 = indexedDb.EstimateTotalPeptides(1, 21, 300, 1, 0);

            Console.WriteLine("{0,12:N0} estimated peptides for max length 300", totalPeptidesMaxLen300);

            var totalPeptidesMaxLen400 = indexedDb.EstimateTotalPeptides(1, 21, 400, 1, 0);

            Console.WriteLine("{0,12:N0} estimated peptides for max length 400", totalPeptidesMaxLen400);

            var totalPeptidesMaxLen500 = indexedDb.EstimateTotalPeptides(1, 21, 500, 1, 0);

            Console.WriteLine("{0,12:N0} estimated peptides for max length 500", totalPeptidesMaxLen500);

            Assert.AreEqual(expectedMaxLen300, totalPeptidesMaxLen300);
            Assert.AreEqual(expectedMaxLen400, totalPeptidesMaxLen400);
            Assert.AreEqual(expectedMaxLen500, totalPeptidesMaxLen500);
        }
Ejemplo n.º 5
0
        public void CreateTargetList()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string databaseFilePath = @"D:\Research\Data\IPRG2014\database\SpikedInPeptides.fasta";

            if (!File.Exists(databaseFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, databaseFilePath);
            }

            var database = new FastaDatabase(databaseFilePath);

            database.Read();
            var indexedDatabase = new IndexedDatabase(database);
            var numTargets      = 0;

            var aaSet = new AminoAcidSet(Modification.Carbamidomethylation);

            Console.WriteLine("Peptide\tFormula\tProtein");
            foreach (var annotationAndOffset in indexedDatabase.AnnotationsAndOffsets(6, 30, 1, 1, Enzyme.Trypsin))
            {
                var annotation = annotationAndOffset.Annotation;
                var peptide    = annotation.Substring(2, annotation.Length - 4);
                var offset     = annotationAndOffset.Offset;

                Console.WriteLine("{0}\t{1}\t{2}", peptide, (aaSet.GetComposition(peptide) + Composition.H2O).ToPlainString(), database.GetProteinName(offset));
                numTargets++;
            }
            Console.WriteLine("NumTargets: {0}", numTargets);
        }
Ejemplo n.º 6
0
        private IEnumerable <AnnotationAndOffset> GetAnnotationsAndOffsets(FastaDatabase database, out long estimatedProteins, CancellationToken?cancellationToken = null)
        {
            var indexedDb = new IndexedDatabase(database);

            indexedDb.Read();
            estimatedProteins = indexedDb.EstimateTotalPeptides(SearchMode, MinSequenceLength, MaxSequenceLength, MaxNumNTermCleavages, MaxNumCTermCleavages);
            IEnumerable <AnnotationAndOffset> annotationsAndOffsets;

            if (SearchMode == InternalCleavageType.MultipleInternalCleavages)
            {
                //annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(MinSequenceLength, MaxSequenceLength);
                annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzymeParallel(MinSequenceLength, MaxSequenceLength, MaxNumThreads, cancellationToken);
            }
            else if (SearchMode == InternalCleavageType.NoInternalCleavage)
            {
                annotationsAndOffsets = indexedDb.IntactSequenceAnnotationsAndOffsets(MinSequenceLength, MaxSequenceLength, MaxNumCTermCleavages);
            }
            else
            {
                annotationsAndOffsets = indexedDb
                                        .SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan(
                    MinSequenceLength, MaxSequenceLength, MaxNumNTermCleavages, MaxNumCTermCleavages);
            }

            return(annotationsAndOffsets);
        }
Ejemplo n.º 7
0
        public void TestGettingProteinLengthAndPosition()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db = new FastaDatabase(dbFile);

            db.Read();
            var indexedDb = new IndexedDatabase(db);

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsets(6, 20, 2, 0, Enzyme.Trypsin))
            {
                var annotation = peptideAnnotationAndOffset.Annotation;
                var offset     = peptideAnnotationAndOffset.Offset;
                Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}",
                                  annotation,
                                  offset,
                                  db.GetProteinName(offset),
                                  db.GetProteinLength(db.GetProteinName(offset)),
                                  db.GetOneBasedPositionInProtein(offset) + 1);
            }
        }
Ejemplo n.º 8
0
        [TestCase(1.5, @"TEST_FOLDER\MSPathFinderT\ID_002216_235ACCEA.fasta", 188961836)]  // 1.5MB
        //[TestCase(3, @"TEST_FOLDER\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)]  // 3MB
        //[TestCase(6, @"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)]  // 6MB
        //[TestCase(15, @"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)]  // 15MB
        public void TestSequenceEnumerationSerial(double size, string dbFile, int expected)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName, dbFile);

            var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER));

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var db        = new FastaDatabase(fastaFile.FullName);
            var indexedDb = new IndexedDatabase(db);

            indexedDb.Read();
            var numSequences = 0L;
            var timeDB       = sw.Elapsed;

            Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds");
            var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250);
            var timeEstimate    = sw.Elapsed;

            Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds");
            Console.WriteLine("Estimated results: " + estimatedAnnOff);
            var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250);
            var timeGetAnn            = sw.Elapsed;

            Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds");
            //foreach (var annotationsAndOffset in annotationsAndOffsets)
            //{
            //    //Interlocked.Increment(ref numSequences);
            //    ++numSequences;
            //}
            //using (
            //    var ofstream =
            //        new FileStream(
            //            Path.Combine(@"F:\InformedProteomicsTestFiles",
            //                Path.GetFileNameWithoutExtension(fastaFile) + "_old.txt"), FileMode.Create))
            //using (var fout = new StreamWriter(ofstream))
            //{
            //    foreach (var annOff in annotationsAndOffsets)
            //    {
            //        numSequences++;
            //        fout.WriteLine(annOff.Annotation);
            //    }
            //}
            numSequences = annotationsAndOffsets.Count();
            var timeParForEach = sw.Elapsed;

            Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds");

            Console.WriteLine("NumPeptides: {0}", numSequences);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
            //Assert.AreEqual(188961836, numSequences);
            Assert.AreEqual(expected, numSequences);
        }
Ejemplo n.º 9
0
        [TestCase(@"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 8898)]  // 6MB
        //[TestCase(@"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 6334)]  // 15MB
        public void TestSequenceEnumerationIntact(string dbFile, int expected)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName, dbFile);

            var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER));

            var sw = new Stopwatch();

            sw.Start();

            const int numCTermCleavages = 0;
            var       db        = new FastaDatabase(fastaFile.FullName);
            var       indexedDb = new IndexedDatabase(db);
            var       timeDB    = sw.Elapsed;

            Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds");
            var estimatedAnnOff = indexedDb.EstimateTotalPeptides(2, 21, 300, 1, numCTermCleavages);
            var timeEstimate    = sw.Elapsed;

            Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds");
            Console.WriteLine("Estimated results: " + estimatedAnnOff);
            var annotationsAndOffsets = indexedDb.IntactSequenceAnnotationsAndOffsets(21, 300, numCTermCleavages);
            var timeGetAnn            = sw.Elapsed;

            Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds");

            /*/Parallel.ForEach(
             *  annotationsAndOffsets,
             *  //                new ParallelOptions { MaxDegreeOfParallelism = 2},
             *  annotationAndOffset =>
             *  {
             *      Interlocked.Increment(ref numSequences);
             *      //++numSequences;
             *  }
             *  );/**/
            //using (var ofstream = new FileStream(Path.Combine(@"F:\InformedProteomicsTestFiles", Path.GetFileNameWithoutExtension(fastaFile) + "_par.txt"), FileMode.Create))
            //using (var fout = new StreamWriter(ofstream))
            //{
            //    foreach (var annOff in annotationsAndOffsets)
            //    {
            //        numSequences++;
            //        fout.WriteLine(annOff.Annotation);
            //    }
            //}
            long numSequences   = annotationsAndOffsets.Count();
            var  timeParForEach = sw.Elapsed;

            Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds");

            Console.WriteLine("NumPeptides: {0}", numSequences);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
            //Assert.AreEqual(188961836, numSequences);
            Assert.AreEqual(expected, numSequences);
        }
Ejemplo n.º 10
0
        public void TestNominalMassErrors()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_003962_71E1A1D4.fasta"));

            var db = new FastaDatabase(fastaFile.FullName);

            db.Read();
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;

            sw.Start();

            var hist  = new long[11];
            var aaSet = new AminoAcidSet();

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation   = peptideAnnotationAndOffset.Annotation;
                var sequenceStr  = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass         = sequenceComp.Mass;
                var nominalMass  = sequenceComp.NominalMass;
                var error        = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass;
                var errorBin     = error + hist.Length / 2;
                if (errorBin < 0)
                {
                    errorBin = 0;
                }
                if (errorBin >= hist.Length)
                {
                    errorBin = hist.Length - 1;
                }
                hist[errorBin]++;
            }

            Console.WriteLine("NumSequences: {0}", numSequences);
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 11
0
        public void TestNominalMassErrors()
        {
            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

//            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta";

            //const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\TargetProteins.fasta";
            var db = new FastaDatabase(dbFile);

            db.Read();
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;

            sw.Start();

            var hist  = new long[11];
            var aaSet = new AminoAcidSet();

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation   = peptideAnnotationAndOffset.Annotation;
                var sequenceStr  = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass         = sequenceComp.Mass;
                var nominalMass  = sequenceComp.NominalMass;
                var error        = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass;
                var errorBin     = error + hist.Length / 2;
                if (errorBin < 0)
                {
                    errorBin = 0;
                }
                if (errorBin >= hist.Length)
                {
                    errorBin = hist.Length - 1;
                }
                hist[errorBin]++;
            }

            Console.WriteLine("NumSequences: {0}", numSequences);
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 12
0
        public void TestCountingPeptides()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

//            const string dbFile = @"C:\cygwin\home\kims336\Data\QCShew\ID_003456_9B916A8B.fasta";
//            const string dbFile = @"H:\Research\DDAPlus\database\Yeast_SGD_withContam.fasta";
//            const string dbFile = @"H:\Research\CPTAC_Phospho\database\ID_004208_295531A4.fasta";
            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);
            //var numPeptides = indexedDb.IntactSequenceAnnotationsAndOffsets(21, 300, 0).LongCount()*31;
            var peptides = indexedDb
                           .SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan(
                100, 300, 1, 0);
            var numPeptides = 0;

            foreach (var peptide in peptides)
            {
                Console.WriteLine("{0}\t{1}", peptide.Annotation, peptide.Offset);
                numPeptides++;
            }

            //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount();
            //var numPeptides =
            //    indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin).LongCount();


            //var numPeptides = indexedDb.AnnotationsAndOffsets(6, 40, 2, 2, Enzyme.Trypsin).LongCount();
            //var numPeptides = indexedDb.IntactSequenceAnnotationsAndOffsets(30, 250, 0).LongCount();
            //    .Select(annotationAndSequence => annotationAndSequence.Annotation.Length - 4)
            //    .Aggregate(0L, (current, length) => current + Math.Min(length - 29, 30));

            Console.WriteLine("NumPeptides: {0}", numPeptides);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 13
0
        private IEnumerable <AnnotationAndOffset> GetAnnotationsAndOffsets(FastaDatabase database)
        {
            var indexedDbTarget = new IndexedDatabase(database);
            IEnumerable <AnnotationAndOffset> annotationsAndOffsets;

            if (NumTolerableTermini == 0)
            {
                annotationsAndOffsets = indexedDbTarget.AnnotationsAndOffsetsNoEnzyme(MinSequenceLength, MaxSequenceLength);
            }
            else
            {
                annotationsAndOffsets = indexedDbTarget.AnnotationsAndOffsets(MinSequenceLength, MaxSequenceLength,
                                                                              NumTolerableTermini, 2, Enzyme);
            }

            return(annotationsAndOffsets);
        }
Ejemplo n.º 14
0
        public void TestSequenceEnumerationParallel2()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_002216_235ACCEA.fasta"));

            var db = new FastaDatabase(fastaFile.FullName);

            db.Read();

            var indexedDb = new IndexedDatabase(db);
            var arr       = db.Characters().ToArray();

            sw.Start();
            //var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 30);
            //            var num = annotationsAndOffsets.AsParallel().LongCount(annotationsAndOffset => annotationsAndOffset.Annotation.IndexOf('W') >= 0);
            //var num = annotationsAndOffsets.LongCount(annotationsAndOffset => annotationsAndOffset.Annotation.IndexOf('W') >= 0);
            //var num = arr.AsParallel().Where(c => c == 'W').LongCount();
            var num = 0;
            var sum = 0L;

            //foreach (var c in arr)
            for (var a = 0; a < arr.Length; a++)
            {
                var c = arr[a];
                for (var i = 0; i < c * 10000; i++)
                {
                    sum += i;
                }
                //                Interlocked.Increment(ref num);
                if (++num == 1000)
                {
                    break;
                }
            }

            Console.WriteLine("NumPeptides: {0}", sum);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 15
0
        public void TestCountingPeptides()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta"));

            var db        = new FastaDatabase(fastaFile.FullName);
            var indexedDb = new IndexedDatabase(db);
            //var numPeptides = indexedDb.IntactSequenceAnnotationsAndOffsets(21, 300, 0).LongCount()*31;
            var peptides = indexedDb
                           .SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan(
                100, 300, 1, 0);
            var numPeptides = 0;

            foreach (var peptide in peptides)
            {
                if (numPeptides < 20)
                {
                    Console.WriteLine("{0}\t{1}", peptide.Annotation, peptide.Offset);
                }
                numPeptides++;
            }

            //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount();
            //var numPeptides =
            //    indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin).LongCount();

            //var numPeptides = indexedDb.AnnotationsAndOffsets(6, 40, 2, 2, Enzyme.Trypsin).LongCount();
            //var numPeptides = indexedDb.IntactSequenceAnnotationsAndOffsets(30, 250, 0).LongCount();
            //    .Select(annotationAndSequence => annotationAndSequence.Annotation.Length - 4)
            //    .Aggregate(0L, (current, length) => current + Math.Min(length - 29, 30));

            Console.WriteLine("NumPeptides: {0}", numPeptides);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 16
0
        public void TestForManyMods()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dbFilePath = @"\\protoapps\UserData\Jungkap\Lewy\db\ID_005140_7A170668.fasta";
            var          indexedDb  = new IndexedDatabase(new FastaDatabase(dbFilePath));

            indexedDb.Read();

            var nProt = indexedDb.EstimateTotalPeptides(1, 21, 300, 1, 0);

            Console.WriteLine(nProt);

            nProt = indexedDb.EstimateTotalPeptides(1, 21, 400, 1, 0);
            Console.WriteLine(nProt);

            nProt = indexedDb.EstimateTotalPeptides(1, 21, 500, 1, 0);
            Console.WriteLine(nProt);

            Console.WriteLine(@"Test not implemented: " + methodName);
        }
Ejemplo n.º 17
0
        private static void TestCountingPeptides()
        {
            var aaSet = new AminoAcidSet();

            var sw = new Stopwatch();

            sw.Start();

            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002166_F86E3B2F.fasta";
            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_003456_9B916A8B.fasta";
            //            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta";
            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);

            indexedDb.Read();
            //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount();
            var peptides =
                indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin);

            Parallel.ForEach(peptides, annotationAndOffset =>
                             //foreach(var annotationAndOffset in peptides)
            {
                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                var graph = SequenceGraph.CreateGraph(aaSet, annotation);
            }
                             )
            ;

//            Console.WriteLine("NumPeptides: {0}", numPeptides);
            sw.Stop();
            var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;

            Console.WriteLine(@"{0:f4} sec", sec);
        }
Ejemplo n.º 18
0
        [TestCase(15, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta", 14862126)] // 15MB
        public void TestSequenceEnumerationNCTerm(double size, string dbFile, int expected)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName, dbFile);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta";  // 1.5MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta";  // 3MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta";  // 6MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta";  // 15MB
            const int numNTermCleavages = 1;
            const int numCTermCleavages = 0;
            var       db           = new FastaDatabase(dbFile);
            var       indexedDb    = new IndexedDatabase(db);
            var       numSequences = 0L;
            var       timeDB       = sw.Elapsed;

            Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds");
            var estimatedAnnOff = indexedDb.EstimateTotalPeptides(1, 21, 300, numNTermCleavages, numCTermCleavages);
            var timeEstimate    = sw.Elapsed;

            Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds");
            Console.WriteLine("Estimated results: " + estimatedAnnOff);
            var annotationsAndOffsets = indexedDb.SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan(21, 300, numNTermCleavages, numCTermCleavages);
            var timeGetAnn            = sw.Elapsed;

            Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds");

            /*/Parallel.ForEach(
             *  annotationsAndOffsets,
             *  //                new ParallelOptions { MaxDegreeOfParallelism = 2},
             *  annotationAndOffset =>
             *  {
             *      Interlocked.Increment(ref numSequences);
             *      //++numSequences;
             *  }
             *  );/**/
            //using (var ofstream = new FileStream(Path.Combine(@"F:\InformedProteomicsTestFiles", Path.GetFileNameWithoutExtension(dbFile) + "_par.txt"), FileMode.Create))
            //using (var fout = new StreamWriter(ofstream))
            //{
            //    foreach (var annOff in annotationsAndOffsets)
            //    {
            //        numSequences++;
            //        fout.WriteLine(annOff.Annotation);
            //    }
            //}
            //foreach (var sao in annotationsAndOffsets)
            //{
            //    numSequences++;
            //}
            numSequences = annotationsAndOffsets.Count();
            var timeParForEach = sw.Elapsed;

            Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds");

            Console.WriteLine("NumPeptides: {0}", numSequences);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
            //Assert.AreEqual(188961836, numSequences);
            Assert.AreEqual(expected, numSequences);
        }
Ejemplo n.º 19
0
        public void TestCountingProteoformsCloseToNTermOrCTerm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const int minSequenceLength    = 21;  // 21
            const int maxSequenceLength    = 300; // 300
            const int maxNumNTermCleavages = 1;
            const int maxNumCTermCleavages = 0;

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            //const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownQCShew\database\ID_002216_235ACCEA.fasta";
            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);

            var both      = 0L;
            var nTermOnly = 0L;
            var cTermOnly = 0L;

            foreach (
                var annotationAndOffset in
                indexedDb.IntactSequenceAnnotationsAndOffsets(minSequenceLength, int.MaxValue,
                                                              maxNumCTermCleavages))
            {
                // numCTermCleavages <= maxNumCTermCleavages
                var annotation       = annotationAndOffset.Annotation;
                var length           = (annotation.Length - 4);
                var numNTermCleavage = 0;
                int cleavedLength;
                while ((cleavedLength = length - numNTermCleavage) >= minSequenceLength)
                {
                    if (cleavedLength <= maxSequenceLength)
                    {
                        if (numNTermCleavage <= maxNumNTermCleavages)
                        {
                            ++both;
                        }
                        else
                        {
                            ++cTermOnly;
                        }
                        var anno = numNTermCleavage == 0
                            ? annotation
                            : string.Format("{0}.{1}", annotation[1 + numNTermCleavage], annotation.Substring(2 + numNTermCleavage));
                        Console.WriteLine(anno);
                    }
                    ++numNTermCleavage;
                }
            }

            foreach (
                var annotationAndOffset in
                indexedDb.IntactSequenceAnnotationsAndOffsetsWithCTermCleavagesLargerThan(minSequenceLength, int.MaxValue,
                                                                                          maxNumCTermCleavages))
            {
                // numCTermCleavages > maxNumCTermCleavages
                var annotation = annotationAndOffset.Annotation;
                var length     = (annotation.Length - 4);
                for (var numNTermCleavage = 0; numNTermCleavage <= maxNumNTermCleavages; numNTermCleavage++)
                {
                    var cleavedLength = length - numNTermCleavage;
                    if (cleavedLength >= minSequenceLength && cleavedLength <= maxSequenceLength)
                    {
                        ++nTermOnly;
                        var anno = numNTermCleavage == 0
                            ? annotation
                            : string.Format("{0}.{1}", annotation[1 + numNTermCleavage], annotation.Substring(2 + numNTermCleavage));
                        Console.WriteLine(anno);
                    }
                }
            }

            Console.WriteLine("Both: {0}", both);
            Console.WriteLine("N-term only: {0}", nTermOnly);
            Console.WriteLine("C-term only: {0}", cTermOnly);
            Console.WriteLine("All: {0}", both + nTermOnly + cTermOnly);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 20
0
        public void TestTopDownScoringForAllXics()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            // Search parameters
            const int numNTermCleavages = 1;  // 30
            const int minLength         = 7;
            const int maxLength         = 1000;
            //const int minCharge = 5; // 3
            //const int maxCharge = 15; // 67
            const int    numMaxModsPerProtein = 0; // 6
            var          precursorTolerance   = new Tolerance(10);
            const string dbFilePath           = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.fasta";
            //const string dbFilePath = @"..\..\..\TestFiles\sprot.Ecoli.2012_07.icdecoy.KR.fasta";

            //const string dbFilePath = @"..\..\..\TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            // const string dbFilePath =
            //    @"C:\cygwin\home\kims336\Data\TopDown\ID_003558_56D73071.fasta";

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            Console.Write("Reading raw file...");
            const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw";
            var          run          = InMemoryLcMsRun.GetLcMsRun(specFilePath);

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);

            // Configure amino acid set
            //            var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.ProteinNTerm, false);
            var dehydro      = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false);
            var cysteinylC   = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false);
            var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false);
            //            var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false);

            var searchModifications = new List <SearchModification>
            {
                //pyroGluQ,
                dehydro,
                cysteinylC,
                glutathioneC,
                //oxM
            };
            var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein);

            var targetDb = new FastaDatabase(dbFilePath);
            //   targetDb.CreateDecoyDatabase(Enzyme.Trypsin);
            //   System.Environment.Exit(1);
            var indexedDb = new IndexedDatabase(targetDb);

            var  numProteins           = 0;
            long totalProtCompositions = 0;

            //long numXics = 0;
            TopDownScorer.MaxCharge = 25;
            TopDownScorer.MinCharge = 8;

            sw.Reset();
            sw.Start();
            Console.WriteLine("Generating XICs...");

            foreach (var protAnnotationAndOffset in indexedDb.IntactSequenceAnnotationsAndOffsets(minLength, maxLength))
            {
                ++numProteins;
                //if (numProteins > 2000) break;

                if (numProteins % 1000 == 0)
                {
                    Console.WriteLine("Processed {0} proteins", numProteins);
                }

                //Console.WriteLine(protAnnotation);

                var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotationAndOffset.Annotation);

                //Console.WriteLine(seqGraph.GetSequenceCompositions()[0]);

                if (seqGraph == null)
                {
                    continue;
                }

                for (var nTermCleavages = 0; nTermCleavages <= numNTermCleavages; nTermCleavages++)
                {
                    if (nTermCleavages > 0)
                    {
                        seqGraph.CleaveNTerm();
                    }
                    var protCompositions = seqGraph.GetSequenceCompositions();
                    foreach (var protComposition in protCompositions)
                    {
                        totalProtCompositions++;
                        // Console.WriteLine(protComposition);
                        var scorer = new TopDownScorer(protComposition, run, precursorTolerance);
                        var score  = scorer.GetScore();

                        Console.WriteLine(score);
                    }
                }
            }

            sw.Stop();
            Console.WriteLine("NumProteins: {0}", numProteins);
            Console.WriteLine("NumProteinCompositions: {0}", totalProtCompositions);

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 21
0
        public void TestRunningTimeChromGen()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf";

            if (!File.Exists(rafFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rafFilePath);
            }

            var rafRun = new PbfLcMsRun(rafFilePath);

            var tolerance = new Tolerance(10);

            const string dbFile = @"D:\Research\Data\CommonContaminants\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);
            var aaSet     = new AminoAcidSet(Modification.Carbamidomethylation);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();
            var numPeptides = 0;

            foreach (var peptide in indexedDb.AnnotationsAndOffsets(6, 30, 2, 2, Enzyme.Trypsin))
            {
                ++numPeptides;
                var comp = new Sequence(peptide.Annotation.Substring(2, peptide.Annotation.Length - 4), aaSet).Composition + Composition.H2O;
                var mz   = new Ion(comp, 2).GetMonoIsotopicMz();
                //Console.WriteLine(peptide.Annotation + " " + mz);
                rafRun.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);
                //run.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);

                //var xic1 = run.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);
                //var xic2 = rafRun.GetFullPrecursorIonExtractedIonChromatogram(mz, tolerance);
                //Assert.True(xic1.Count == xic2.Count);
                //for (var i = 0; i < xic1.Count; i++)
                //{
                //    if (!xic1[i].Equals(xic2[i]))
                //    {
                //        Console.WriteLine("{0} {1} {2}", i, xic1[i], xic2[i]);
                //    }
                //    Assert.True(xic1[i].Equals(xic2[i]));
                //}

                if (numPeptides == 100000)
                {
                    break;
                }
            }
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 22
0
        public void TestCountingProteoformsCloseToNTermOrCTerm()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const int minSequenceLength    = 21;  // 21
            const int maxSequenceLength    = 300; // 300
            const int maxNumNTermCleavages = 1;
            const int maxNumCTermCleavages = 0;

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta"));

            var db        = new FastaDatabase(fastaFile.FullName);
            var indexedDb = new IndexedDatabase(db);

            var both      = 0L;
            var nTermOnly = 0L;
            var cTermOnly = 0L;

            var displayedResults = 0;

            foreach (
                var annotationAndOffset in
                indexedDb.IntactSequenceAnnotationsAndOffsets(minSequenceLength, int.MaxValue,
                                                              maxNumCTermCleavages))
            {
                // numCTermCleavages <= maxNumCTermCleavages
                var annotation       = annotationAndOffset.Annotation;
                var length           = (annotation.Length - 4);
                var numNTermCleavage = 0;
                int cleavedLength;
                while ((cleavedLength = length - numNTermCleavage) >= minSequenceLength)
                {
                    if (cleavedLength <= maxSequenceLength)
                    {
                        if (numNTermCleavage <= maxNumNTermCleavages)
                        {
                            ++both;
                        }
                        else
                        {
                            ++cTermOnly;
                        }
                        var anno = numNTermCleavage == 0
                            ? annotation
                            : string.Format("{0}.{1}", annotation[1 + numNTermCleavage], annotation.Substring(2 + numNTermCleavage));

                        if (displayedResults < 20)
                        {
                            Console.WriteLine(anno);
                        }
                        displayedResults++;
                    }
                    ++numNTermCleavage;
                }
            }

            displayedResults = 0;

            foreach (
                var annotationAndOffset in
                indexedDb.IntactSequenceAnnotationsAndOffsetsWithCTermCleavagesLargerThan(minSequenceLength, int.MaxValue,
                                                                                          maxNumCTermCleavages))
            {
                // numCTermCleavages > maxNumCTermCleavages
                var annotation = annotationAndOffset.Annotation;
                var length     = (annotation.Length - 4);
                for (var numNTermCleavage = 0; numNTermCleavage <= maxNumNTermCleavages; numNTermCleavage++)
                {
                    var cleavedLength = length - numNTermCleavage;
                    if (cleavedLength >= minSequenceLength && cleavedLength <= maxSequenceLength)
                    {
                        ++nTermOnly;
                        var anno = numNTermCleavage == 0
                            ? annotation
                            : string.Format("{0}.{1}", annotation[1 + numNTermCleavage], annotation.Substring(2 + numNTermCleavage));

                        if (displayedResults < 20)
                        {
                            Console.WriteLine(anno);
                        }
                        displayedResults++;
                    }
                }
            }

            Console.WriteLine("Both: {0}", both);
            Console.WriteLine("N-term only: {0}", nTermOnly);
            Console.WriteLine("C-term only: {0}", cTermOnly);
            Console.WriteLine("All: {0}", both + nTermOnly + cTermOnly);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Ejemplo n.º 23
0
        [TestCase(3, @"TEST_FOLDER\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)]   // 3MB
        //[TestCase(6, @"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)]  // 6MB
        //[TestCase(15, @"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)]  // 15MB
        public void TestSequenceEnumeration(double size, string dbFile, int expected)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName, dbFile);

            var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER));

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var db           = new FastaDatabase(fastaFile.FullName);
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;
            var timeDB       = sw.Elapsed;

            Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds");
            var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250);
            var timeEstimate    = sw.Elapsed;

            Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds");
            //int coreCount = 0;
            //foreach (var item in new System.Management.ManagementObjectSearcher("Select NumberOfCores from Win32_Processor").Get())
            //{
            //    coreCount += int.Parse(item["NumberOfCores"].ToString());
            //}
            //Console.WriteLine("Number Of Cores: {0}", coreCount);
            //Console.WriteLine("Processors: " + System.Environment.ProcessorCount);
            Console.WriteLine("Estimated results: " + estimatedAnnOff);
            var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzymeParallel(30, 250);
            var timeGetAnn            = sw.Elapsed;

            Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds");

            /*/Parallel.ForEach(
             *  annotationsAndOffsets,
             *  //                new ParallelOptions { MaxDegreeOfParallelism = 2},
             *  annotationAndOffset =>
             *  {
             *      Interlocked.Increment(ref numSequences);
             *      //++numSequences;
             *  }
             *  );/**/
            //annotationsAndOffsets.Select(annotationsAndOffset => annotationsAndOffset.)
            // Below, original: 110, 109(total) seconds
            // Parallelizing AnnotationsAndOffsetsNoEnzyme: 86 seconds
            // Parallelizing AnnotationsAndOffsetsNoEnzyme, yield returns: 79.6, 94, 60, 60 seconds
            //
            // 3MB
            // serial:
            // Parallel2: 107,
            //
            // 6MB
            // serial:
            // Parallel2:
            //
            // 15MB
            // serial:
            // Parallel2:
            //using (var ofstream = new FileStream(Path.Combine(@"F:\InformedProteomicsTestFiles", Path.GetFileNameWithoutExtension(fastaFile) + "_par.txt"), FileMode.Create))
            //using (var fout = new StreamWriter(ofstream))
            //{
            //    foreach (var annOff in annotationsAndOffsets)
            //    {
            //        numSequences++;
            //        fout.WriteLine(annOff.Annotation);
            //    }
            //}
            numSequences = annotationsAndOffsets.Count();
            var timeParForEach = sw.Elapsed;

            Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds");

            Console.WriteLine("NumPeptides: {0}", numSequences);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
            //Assert.AreEqual(188961836, numSequences);
            Assert.AreEqual(expected, numSequences);
        }
Ejemplo n.º 24
0
        }                                                         // true: target and decoy, false: target only, null: decoy only

        public void QuickId()
        {
            const string rawFilePath   = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw";
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";
            const string modFilePath   = @"H:\Research\QCShew_TopDown\Production\Mods.txt";
            const int    numBits       = 29; // max error: 4ppm
            const int    minCharge     = 1;
            const int    maxCharge     = 20;
            var          tolerance     = new Tolerance(10);
            const double corrThreshold = 0.7;

            var          comparer        = new MzComparerWithBinning(numBits);
            const double minFragmentMass = 200.0;
            const double maxFragmentMass = 50000.0;
            var          minFragMassBin  = comparer.GetBinNumber(minFragmentMass);
            var          maxFragMassBin  = comparer.GetBinNumber(maxFragmentMass);

            var aminoAcidSet = new AminoAcidSet(modFilePath);

            var run           = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var ms2ScanNumArr = run.GetScanNumbers(2).ToArray();

            var sw = new Stopwatch();

            sw.Start();
            Console.Write("Building Spectrum Arrays...");
            var massVectors = new BitArray[maxFragMassBin - minFragMassBin + 1];

            for (var i = minFragMassBin; i <= maxFragMassBin; i++)
            {
                massVectors[i - minFragMassBin] = new BitArray(run.MaxLcScan + 1);
            }

            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                var productSpec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum;
                if (productSpec == null)
                {
                    continue;
                }

                var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(productSpec.Peaks, minCharge, maxCharge, 2, 1.1, tolerance, corrThreshold);

                if (deconvolutedPeaks == null)
                {
                    continue;
                }

                foreach (var p in deconvolutedPeaks)
                {
                    var mass      = p.Mass;
                    var deltaMass = tolerance.GetToleranceAsDa(mass, 1);
                    var minMass   = mass - deltaMass;
                    var maxMass   = mass + deltaMass;

                    var minBinNum = comparer.GetBinNumber(minMass);
                    var maxBinNum = comparer.GetBinNumber(maxMass);
                    for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
                    {
                        if (binNum >= minFragMassBin && binNum <= maxFragMassBin)
                        {
                            massVectors[binNum - minFragMassBin][ms2ScanNum] = true;
                        }
                    }
                }
            }
            sw.Stop();
            Console.WriteLine(@"{0:f1} sec.", sw.Elapsed.TotalSeconds);

            sw.Reset();
            sw.Start();
            var fastaDb = new FastaDatabase(fastaFilePath);

            fastaDb.Read();
            var indexedDb   = new IndexedDatabase(fastaDb);
            var numProteins = 0;
            var intactProteinAnnotationAndOffsets =
                indexedDb.IntactSequenceAnnotationsAndOffsets(0, int.MaxValue);

            var bestProtein = new string[run.MaxLcScan + 1];
            var bestScore   = new int[run.MaxLcScan + 1];

            foreach (var annotationAndOffset in intactProteinAnnotationAndOffsets)
            {
                if (++numProteins % 10 == 0)
                {
                    Console.WriteLine(@"Processing, {0} proteins done, {1:f1} sec elapsed",
                                      numProteins,
                                      sw.Elapsed.TotalSeconds);
                }
                var annotation = annotationAndOffset.Annotation;
                var offset     = annotationAndOffset.Offset;

                var protSequence = annotation.Substring(2, annotation.Length - 4);

                // suffix
                var seqGraph = SequenceGraph.CreateGraph(aminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                                                         AminoAcid.ProteinCTerm);
                if (seqGraph == null)
                {
                    continue;
                }

                for (var numNTermCleavage = 0; numNTermCleavage <= 1; numNTermCleavage++)
                {
                    if (numNTermCleavage > 0)
                    {
                        seqGraph.CleaveNTerm();
                    }
                    var allCompositions = seqGraph.GetAllFragmentNodeCompositions();

                    var scoreArr = new int[run.MaxLcScan + 1];
                    foreach (var fragComp in allCompositions)
                    {
                        var suffixMass = fragComp.Mass + BaseIonType.Y.OffsetComposition.Mass;
                        var binNum     = comparer.GetBinNumber(suffixMass);
                        if (binNum < minFragMassBin || binNum > maxFragMassBin)
                        {
                            continue;
                        }

                        var vector = massVectors[binNum - minFragMassBin];
                        foreach (var ms2ScanNum in ms2ScanNumArr)
                        {
                            if (vector[ms2ScanNum])
                            {
                                ++scoreArr[ms2ScanNum];
                            }
                        }
                    }
                    foreach (var ms2ScanNum in ms2ScanNumArr)
                    {
                        if (scoreArr[ms2ScanNum] > bestScore[ms2ScanNum])
                        {
                            bestScore[ms2ScanNum] = scoreArr[ms2ScanNum];
                            var proteinName = fastaDb.GetProteinName(offset);
                            bestProtein[ms2ScanNum] = proteinName + (numNTermCleavage == 1 ? "'" : "");
                        }
                    }
                }
                // prefix
            }

            Console.WriteLine("ScanNum\tBestProtein\tScore");
            foreach (var ms2ScanNum in ms2ScanNumArr)
            {
                Console.WriteLine("{0}\t{1}\t{2}", ms2ScanNum, bestScore[ms2ScanNum], bestProtein[ms2ScanNum] ?? "");
            }
        }
Ejemplo n.º 25
0
        [TestCase(3, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)]   // 3MB
        //[TestCase(6, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)]  // 6MB
        //[TestCase(15, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)]  // 15MB
        public void TestSequenceEnumerationSerial(double size, string dbFile, int expected)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName, dbFile);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta";  // 1.5MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta";  // 3MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta";  // 6MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta";  // 15MB
            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);

            indexedDb.Read();
            var numSequences = 0L;
            var timeDB       = sw.Elapsed;

            Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds");
            var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250);
            var timeEstimate    = sw.Elapsed;

            Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds");
            Console.WriteLine("Estimated results: " + estimatedAnnOff);
            var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250);
            var timeGetAnn            = sw.Elapsed;

            Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds");
            //foreach (var annotationsAndOffset in annotationsAndOffsets)
            //{
            //    //Interlocked.Increment(ref numSequences);
            //    ++numSequences;
            //}
            //using (
            //    var ofstream =
            //        new FileStream(
            //            Path.Combine(@"F:\InformedProteomicsTestFiles",
            //                Path.GetFileNameWithoutExtension(dbFile) + "_old.txt"), FileMode.Create))
            //using (var fout = new StreamWriter(ofstream))
            //{
            //    foreach (var annOff in annotationsAndOffsets)
            //    {
            //        numSequences++;
            //        fout.WriteLine(annOff.Annotation);
            //    }
            //}
            numSequences = annotationsAndOffsets.Count();
            var timeParForEach = sw.Elapsed;

            Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds");

            Console.WriteLine("NumPeptides: {0}", numSequences);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
            //Assert.AreEqual(188961836, numSequences);
            Assert.AreEqual(expected, numSequences);
        }