private IEnumerable<AnnotationAndOffset> GetAnnotationsAndOffsets(FastaDatabase database, out long estimatedProteins, CancellationToken? cancellationToken = null) { var indexedDb = new IndexedDatabase(database); indexedDb.Read(); estimatedProteins = indexedDb.EstimateTotalPeptides(SearchMode, MinSequenceLength, MaxSequenceLength, MaxNumNTermCleavages, MaxNumCTermCleavages); IEnumerable<AnnotationAndOffset> annotationsAndOffsets; if (SearchMode == InternalCleavageType.MultipleInternalCleavages) { //annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(MinSequenceLength, MaxSequenceLength); annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzymeParallel(MinSequenceLength, MaxSequenceLength, MaxNumThreads, cancellationToken); } else if (SearchMode == InternalCleavageType.NoInternalCleavage) { annotationsAndOffsets = indexedDb.IntactSequenceAnnotationsAndOffsets(MinSequenceLength, MaxSequenceLength, MaxNumCTermCleavages); } else { annotationsAndOffsets = indexedDb .SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan( MinSequenceLength, MaxSequenceLength, MaxNumNTermCleavages, MaxNumCTermCleavages); } return annotationsAndOffsets; }
[TestCase(15, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta", 14862126)] // 15MB public void TestSequenceEnumerationNCTerm(double size, string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName, dbFile); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta"; // 1.5MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta"; // 3MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta"; // 6MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta"; // 15MB const int numNTermCleavages = 1; const int numCTermCleavages = 0; var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); var numSequences = 0L; var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(1, 21, 300, numNTermCleavages, numCTermCleavages); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan(21, 300, numNTermCleavages, numCTermCleavages); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); /*/Parallel.ForEach( annotationsAndOffsets, // new ParallelOptions { MaxDegreeOfParallelism = 2}, annotationAndOffset => { Interlocked.Increment(ref numSequences); //++numSequences; } );/**/ //using (var ofstream = new FileStream(Path.Combine(@"F:\InformedProteomicsTestFiles", Path.GetFileNameWithoutExtension(dbFile) + "_par.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} //foreach (var sao in annotationsAndOffsets) //{ // numSequences++; //} numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }
public void TestCountingPeptides() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta"; if (!File.Exists(dbFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile); } // const string dbFile = @"C:\cygwin\home\kims336\Data\QCShew\ID_003456_9B916A8B.fasta"; // const string dbFile = @"H:\Research\DDAPlus\database\Yeast_SGD_withContam.fasta"; // const string dbFile = @"H:\Research\CPTAC_Phospho\database\ID_004208_295531A4.fasta"; var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); //var numPeptides = indexedDb.IntactSequenceAnnotationsAndOffsets(21, 300, 0).LongCount()*31; var peptides = indexedDb .SequenceAnnotationsAndOffsetsWithNtermOrCtermCleavageNoLargerThan( 100, 300, 1, 0); var numPeptides = 0; foreach (var peptide in peptides) { Console.WriteLine("{0}\t{1}",peptide.Annotation, peptide.Offset); numPeptides++; } //var numPeptides = indexedDb.AnnotationsAndOffsetsNoEnzyme(7, 150).LongCount(); //var numPeptides = // indexedDb.AnnotationsAndOffsets(7, 40, 2, 2, Enzyme.Trypsin).LongCount(); //var numPeptides = indexedDb.AnnotationsAndOffsets(6, 40, 2, 2, Enzyme.Trypsin).LongCount(); //var numPeptides = indexedDb.IntactSequenceAnnotationsAndOffsets(30, 250, 0).LongCount(); // .Select(annotationAndSequence => annotationAndSequence.Annotation.Length - 4) // .Aggregate(0L, (current, length) => current + Math.Min(length - 29, 30)); Console.WriteLine("NumPeptides: {0}", numPeptides); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); }