Esempio n. 1
0
        public void TestEnumeratingPeptides()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta"));

            var db          = new FastaDatabase(fastaFile.FullName);
            var indexedDb   = new IndexedDatabase(db);
            var numPeptides = 0;

            foreach (var annotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(10, 13))
            {
                if (numPeptides < 20)
                {
                    Console.WriteLine(annotationAndOffset.Annotation);
                }
                numPeptides++;
            }
            sw.Stop();

            Console.WriteLine("Peptide count: {0}", numPeptides);

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Esempio n. 2
0
        public void TestEnumeratingPeptides()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta";

            if (!File.Exists(dbFile))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile);
            }

            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);

            foreach (var annotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(10, 13))
            {
                Console.WriteLine(annotationAndOffset.Annotation);
            }
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Esempio n. 3
0
        [TestCase(1.5, @"TEST_FOLDER\MSPathFinderT\ID_002216_235ACCEA.fasta", 188961836)]  // 1.5MB
        //[TestCase(3, @"TEST_FOLDER\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)]  // 3MB
        //[TestCase(6, @"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)]  // 6MB
        //[TestCase(15, @"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)]  // 15MB
        public void TestSequenceEnumerationSerial(double size, string dbFile, int expected)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName, dbFile);

            var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER));

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            var db        = new FastaDatabase(fastaFile.FullName);
            var indexedDb = new IndexedDatabase(db);

            indexedDb.Read();
            var numSequences = 0L;
            var timeDB       = sw.Elapsed;

            Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds");
            var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250);
            var timeEstimate    = sw.Elapsed;

            Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds");
            Console.WriteLine("Estimated results: " + estimatedAnnOff);
            var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250);
            var timeGetAnn            = sw.Elapsed;

            Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds");
            //foreach (var annotationsAndOffset in annotationsAndOffsets)
            //{
            //    //Interlocked.Increment(ref numSequences);
            //    ++numSequences;
            //}
            //using (
            //    var ofstream =
            //        new FileStream(
            //            Path.Combine(@"F:\InformedProteomicsTestFiles",
            //                Path.GetFileNameWithoutExtension(fastaFile) + "_old.txt"), FileMode.Create))
            //using (var fout = new StreamWriter(ofstream))
            //{
            //    foreach (var annOff in annotationsAndOffsets)
            //    {
            //        numSequences++;
            //        fout.WriteLine(annOff.Annotation);
            //    }
            //}
            numSequences = annotationsAndOffsets.Count();
            var timeParForEach = sw.Elapsed;

            Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds");

            Console.WriteLine("NumPeptides: {0}", numSequences);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
            //Assert.AreEqual(188961836, numSequences);
            Assert.AreEqual(expected, numSequences);
        }
Esempio n. 4
0
        public void TestNominalMassErrors()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

            var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_003962_71E1A1D4.fasta"));

            var db = new FastaDatabase(fastaFile.FullName);

            db.Read();
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;

            sw.Start();

            var hist  = new long[11];
            var aaSet = new AminoAcidSet();

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation   = peptideAnnotationAndOffset.Annotation;
                var sequenceStr  = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass         = sequenceComp.Mass;
                var nominalMass  = sequenceComp.NominalMass;
                var error        = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass;
                var errorBin     = error + hist.Length / 2;
                if (errorBin < 0)
                {
                    errorBin = 0;
                }
                if (errorBin >= hist.Length)
                {
                    errorBin = hist.Length - 1;
                }
                hist[errorBin]++;
            }

            Console.WriteLine("NumSequences: {0}", numSequences);
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Esempio n. 5
0
        public void TestNominalMassErrors()
        {
            const int minLength = 300;
            const int maxLength = 400;

            var sw = new System.Diagnostics.Stopwatch();

//            const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta";
            const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta";

            //const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\TargetProteins.fasta";
            var db = new FastaDatabase(dbFile);

            db.Read();
            var indexedDb    = new IndexedDatabase(db);
            var numSequences = 0L;

            sw.Start();

            var hist  = new long[11];
            var aaSet = new AminoAcidSet();

            foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength))
            {
                ++numSequences;
                var annotation   = peptideAnnotationAndOffset.Annotation;
                var sequenceStr  = annotation.Substring(2, annotation.Length - 4);
                var sequenceComp = aaSet.GetComposition(sequenceStr);
                var mass         = sequenceComp.Mass;
                var nominalMass  = sequenceComp.NominalMass;
                var error        = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass;
                var errorBin     = error + hist.Length / 2;
                if (errorBin < 0)
                {
                    errorBin = 0;
                }
                if (errorBin >= hist.Length)
                {
                    errorBin = hist.Length - 1;
                }
                hist[errorBin]++;
            }

            Console.WriteLine("NumSequences: {0}", numSequences);
            for (var i = 0; i < hist.Length; i++)
            {
                Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences);
            }

            sw.Stop();

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds);
        }
Esempio n. 6
0
        private IEnumerable <AnnotationAndOffset> GetAnnotationsAndOffsets(FastaDatabase database)
        {
            var indexedDbTarget = new IndexedDatabase(database);
            IEnumerable <AnnotationAndOffset> annotationsAndOffsets;

            if (NumTolerableTermini == 0)
            {
                annotationsAndOffsets = indexedDbTarget.AnnotationsAndOffsetsNoEnzyme(MinSequenceLength, MaxSequenceLength);
            }
            else
            {
                annotationsAndOffsets = indexedDbTarget.AnnotationsAndOffsets(MinSequenceLength, MaxSequenceLength,
                                                                              NumTolerableTermini, 2, Enzyme);
            }

            return(annotationsAndOffsets);
        }
Esempio n. 7
0
        [TestCase(3, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)]   // 3MB
        //[TestCase(6, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)]  // 6MB
        //[TestCase(15, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)]  // 15MB
        public void TestSequenceEnumerationSerial(double size, string dbFile, int expected)
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName, dbFile);

            var sw = new System.Diagnostics.Stopwatch();

            sw.Start();

            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta";  // 1.5MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta";  // 3MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta";  // 6MB
            //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta";  // 15MB
            var db        = new FastaDatabase(dbFile);
            var indexedDb = new IndexedDatabase(db);

            indexedDb.Read();
            var numSequences = 0L;
            var timeDB       = sw.Elapsed;

            Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds");
            var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250);
            var timeEstimate    = sw.Elapsed;

            Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds");
            Console.WriteLine("Estimated results: " + estimatedAnnOff);
            var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250);
            var timeGetAnn            = sw.Elapsed;

            Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds");
            //foreach (var annotationsAndOffset in annotationsAndOffsets)
            //{
            //    //Interlocked.Increment(ref numSequences);
            //    ++numSequences;
            //}
            //using (
            //    var ofstream =
            //        new FileStream(
            //            Path.Combine(@"F:\InformedProteomicsTestFiles",
            //                Path.GetFileNameWithoutExtension(dbFile) + "_old.txt"), FileMode.Create))
            //using (var fout = new StreamWriter(ofstream))
            //{
            //    foreach (var annOff in annotationsAndOffsets)
            //    {
            //        numSequences++;
            //        fout.WriteLine(annOff.Annotation);
            //    }
            //}
            numSequences = annotationsAndOffsets.Count();
            var timeParForEach = sw.Elapsed;

            Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds");

            Console.WriteLine("NumPeptides: {0}", numSequences);
            sw.Stop();

            Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds);
            //Assert.AreEqual(188961836, numSequences);
            Assert.AreEqual(expected, numSequences);
        }