public void TestForManyMods(string dbFile, int expectedMaxLen300, int expectedMaxLen400, int expectedMaxLen500) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER)); var indexedDb = new IndexedDatabase(new FastaDatabase(fastaFile.FullName)); indexedDb.Read(); var totalPeptidesMaxLen300 = indexedDb.EstimateTotalPeptides(1, 21, 300, 1, 0); Console.WriteLine("{0,12:N0} estimated peptides for max length 300", totalPeptidesMaxLen300); var totalPeptidesMaxLen400 = indexedDb.EstimateTotalPeptides(1, 21, 400, 1, 0); Console.WriteLine("{0,12:N0} estimated peptides for max length 400", totalPeptidesMaxLen400); var totalPeptidesMaxLen500 = indexedDb.EstimateTotalPeptides(1, 21, 500, 1, 0); Console.WriteLine("{0,12:N0} estimated peptides for max length 500", totalPeptidesMaxLen500); Assert.AreEqual(expectedMaxLen300, totalPeptidesMaxLen300); Assert.AreEqual(expectedMaxLen400, totalPeptidesMaxLen400); Assert.AreEqual(expectedMaxLen500, totalPeptidesMaxLen500); }
private IEnumerable <AnnotationAndOffset> GetAnnotationsAndOffsets(FastaDatabase database, out long estimatedProteins, CancellationToken?cancellationToken = null) { var indexedDb = new IndexedDatabase(database); indexedDb.Read(); estimatedProteins = indexedDb.EstimateTotalPeptides(SearchMode, MinSequenceLength, MaxSequenceLength, MaxNumNTermCleavages, MaxNumCTermCleavages); IEnumerable <AnnotationAndOffset> annotationsAndOffsets; if (SearchMode == InternalCleavageType.MultipleInternalCleavages) { //annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(MinSequenceLength, MaxSequenceLength); annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzymeParallel(MinSequenceLength, MaxSequenceLength, MaxNumThreads, cancellationToken); } else if (SearchMode == InternalCleavageType.NoInternalCleavage) { annotationsAndOffsets = indexedDb.IntactSequenceAnnotationsAndOffsets(MinSequenceLength, MaxSequenceLength, MaxNumCTermCleavages); } else { annotationsAndOffsets = indexedDb .SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan( MinSequenceLength, MaxSequenceLength, MaxNumNTermCleavages, MaxNumCTermCleavages); } return(annotationsAndOffsets); }
[TestCase(1.5, @"TEST_FOLDER\MSPathFinderT\ID_002216_235ACCEA.fasta", 188961836)] // 1.5MB //[TestCase(3, @"TEST_FOLDER\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)] // 3MB //[TestCase(6, @"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)] // 6MB //[TestCase(15, @"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)] // 15MB public void TestSequenceEnumerationSerial(double size, string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName, dbFile); var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER)); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var db = new FastaDatabase(fastaFile.FullName); var indexedDb = new IndexedDatabase(db); indexedDb.Read(); var numSequences = 0L; var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); //foreach (var annotationsAndOffset in annotationsAndOffsets) //{ // //Interlocked.Increment(ref numSequences); // ++numSequences; //} //using ( // var ofstream = // new FileStream( // Path.Combine(@"F:\InformedProteomicsTestFiles", // Path.GetFileNameWithoutExtension(fastaFile) + "_old.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }
[TestCase(@"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 8898)] // 6MB //[TestCase(@"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 6334)] // 15MB public void TestSequenceEnumerationIntact(string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName, dbFile); var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER)); var sw = new Stopwatch(); sw.Start(); const int numCTermCleavages = 0; var db = new FastaDatabase(fastaFile.FullName); var indexedDb = new IndexedDatabase(db); var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(2, 21, 300, 1, numCTermCleavages); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.IntactSequenceAnnotationsAndOffsets(21, 300, numCTermCleavages); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); /*/Parallel.ForEach( * annotationsAndOffsets, * // new ParallelOptions { MaxDegreeOfParallelism = 2}, * annotationAndOffset => * { * Interlocked.Increment(ref numSequences); * //++numSequences; * } * );/**/ //using (var ofstream = new FileStream(Path.Combine(@"F:\InformedProteomicsTestFiles", Path.GetFileNameWithoutExtension(fastaFile) + "_par.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} long numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }
public void TestForManyMods() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dbFilePath = @"\\protoapps\UserData\Jungkap\Lewy\db\ID_005140_7A170668.fasta"; var indexedDb = new IndexedDatabase(new FastaDatabase(dbFilePath)); indexedDb.Read(); var nProt = indexedDb.EstimateTotalPeptides(1, 21, 300, 1, 0); Console.WriteLine(nProt); nProt = indexedDb.EstimateTotalPeptides(1, 21, 400, 1, 0); Console.WriteLine(nProt); nProt = indexedDb.EstimateTotalPeptides(1, 21, 500, 1, 0); Console.WriteLine(nProt); Console.WriteLine(@"Test not implemented: " + methodName); }
[TestCase(3, @"TEST_FOLDER\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)] // 3MB //[TestCase(6, @"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)] // 6MB //[TestCase(15, @"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)] // 15MB public void TestSequenceEnumeration(double size, string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName, dbFile); var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER)); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var db = new FastaDatabase(fastaFile.FullName); var indexedDb = new IndexedDatabase(db); var numSequences = 0L; var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); //int coreCount = 0; //foreach (var item in new System.Management.ManagementObjectSearcher("Select NumberOfCores from Win32_Processor").Get()) //{ // coreCount += int.Parse(item["NumberOfCores"].ToString()); //} //Console.WriteLine("Number Of Cores: {0}", coreCount); //Console.WriteLine("Processors: " + System.Environment.ProcessorCount); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzymeParallel(30, 250); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); /*/Parallel.ForEach( * annotationsAndOffsets, * // new ParallelOptions { MaxDegreeOfParallelism = 2}, * annotationAndOffset => * { * Interlocked.Increment(ref numSequences); * //++numSequences; * } * );/**/ //annotationsAndOffsets.Select(annotationsAndOffset => annotationsAndOffset.) // Below, original: 110, 109(total) seconds // Parallelizing AnnotationsAndOffsetsNoEnzyme: 86 seconds // Parallelizing AnnotationsAndOffsetsNoEnzyme, yield returns: 79.6, 94, 60, 60 seconds // // 3MB // serial: // Parallel2: 107, // // 6MB // serial: // Parallel2: // // 15MB // serial: // Parallel2: //using (var ofstream = new FileStream(Path.Combine(@"F:\InformedProteomicsTestFiles", Path.GetFileNameWithoutExtension(fastaFile) + "_par.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }
[TestCase(3, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)] // 3MB //[TestCase(6, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)] // 6MB //[TestCase(15, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)] // 15MB public void TestSequenceEnumerationSerial(double size, string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName, dbFile); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta"; // 1.5MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta"; // 3MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta"; // 6MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta"; // 15MB var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); indexedDb.Read(); var numSequences = 0L; var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); //foreach (var annotationsAndOffset in annotationsAndOffsets) //{ // //Interlocked.Increment(ref numSequences); // ++numSequences; //} //using ( // var ofstream = // new FileStream( // Path.Combine(@"F:\InformedProteomicsTestFiles", // Path.GetFileNameWithoutExtension(dbFile) + "_old.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }
[TestCase(15, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta", 14862126)] // 15MB public void TestSequenceEnumerationNCTerm(double size, string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName, dbFile); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta"; // 1.5MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta"; // 3MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta"; // 6MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta"; // 15MB const int numNTermCleavages = 1; const int numCTermCleavages = 0; var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); var numSequences = 0L; var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(1, 21, 300, numNTermCleavages, numCTermCleavages); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan(21, 300, numNTermCleavages, numCTermCleavages); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); /*/Parallel.ForEach( * annotationsAndOffsets, * // new ParallelOptions { MaxDegreeOfParallelism = 2}, * annotationAndOffset => * { * Interlocked.Increment(ref numSequences); * //++numSequences; * } * );/**/ //using (var ofstream = new FileStream(Path.Combine(@"F:\InformedProteomicsTestFiles", Path.GetFileNameWithoutExtension(dbFile) + "_par.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} //foreach (var sao in annotationsAndOffsets) //{ // numSequences++; //} numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }