public void TestEnumeratingPeptides() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\Short.fasta")); var db = new FastaDatabase(fastaFile.FullName); var indexedDb = new IndexedDatabase(db); var numPeptides = 0; foreach (var annotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(10, 13)) { if (numPeptides < 20) { Console.WriteLine(annotationAndOffset.Annotation); } numPeptides++; } sw.Stop(); Console.WriteLine("Peptide count: {0}", numPeptides); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestEnumeratingPeptides() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\Short.fasta"; if (!File.Exists(dbFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFile); } var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); foreach (var annotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(10, 13)) { Console.WriteLine(annotationAndOffset.Annotation); } sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); }
[TestCase(1.5, @"TEST_FOLDER\MSPathFinderT\ID_002216_235ACCEA.fasta", 188961836)] // 1.5MB //[TestCase(3, @"TEST_FOLDER\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)] // 3MB //[TestCase(6, @"TEST_FOLDER\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)] // 6MB //[TestCase(15, @"TEST_FOLDER\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)] // 15MB public void TestSequenceEnumerationSerial(double size, string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName, dbFile); var fastaFile = Utils.GetTestFile(methodName, dbFile.Replace("TEST_FOLDER", Utils.DEFAULT_TEST_FILE_FOLDER)); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var db = new FastaDatabase(fastaFile.FullName); var indexedDb = new IndexedDatabase(db); indexedDb.Read(); var numSequences = 0L; var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); //foreach (var annotationsAndOffset in annotationsAndOffsets) //{ // //Interlocked.Increment(ref numSequences); // ++numSequences; //} //using ( // var ofstream = // new FileStream( // Path.Combine(@"F:\InformedProteomicsTestFiles", // Path.GetFileNameWithoutExtension(fastaFile) + "_old.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }
public void TestNominalMassErrors() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const int minLength = 300; const int maxLength = 400; var sw = new System.Diagnostics.Stopwatch(); var fastaFile = Utils.GetTestFile(methodName, Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_003962_71E1A1D4.fasta")); var db = new FastaDatabase(fastaFile.FullName); db.Read(); var indexedDb = new IndexedDatabase(db); var numSequences = 0L; sw.Start(); var hist = new long[11]; var aaSet = new AminoAcidSet(); foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength)) { ++numSequences; var annotation = peptideAnnotationAndOffset.Annotation; var sequenceStr = annotation.Substring(2, annotation.Length - 4); var sequenceComp = aaSet.GetComposition(sequenceStr); var mass = sequenceComp.Mass; var nominalMass = sequenceComp.NominalMass; var error = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass; var errorBin = error + hist.Length / 2; if (errorBin < 0) { errorBin = 0; } if (errorBin >= hist.Length) { errorBin = hist.Length - 1; } hist[errorBin]++; } Console.WriteLine("NumSequences: {0}", numSequences); for (var i = 0; i < hist.Length; i++) { Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences); } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestNominalMassErrors() { const int minLength = 300; const int maxLength = 400; var sw = new System.Diagnostics.Stopwatch(); // const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\H_sapiens_Uniprot_SPROT_2013-05-01_withContam.fasta"; const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta"; //const string dbFile = @"C:\cygwin\home\kims336\Data\TopDownJia\database\TargetProteins.fasta"; var db = new FastaDatabase(dbFile); db.Read(); var indexedDb = new IndexedDatabase(db); var numSequences = 0L; sw.Start(); var hist = new long[11]; var aaSet = new AminoAcidSet(); foreach (var peptideAnnotationAndOffset in indexedDb.AnnotationsAndOffsetsNoEnzyme(minLength, maxLength)) { ++numSequences; var annotation = peptideAnnotationAndOffset.Annotation; var sequenceStr = annotation.Substring(2, annotation.Length - 4); var sequenceComp = aaSet.GetComposition(sequenceStr); var mass = sequenceComp.Mass; var nominalMass = sequenceComp.NominalMass; var error = (int)Math.Round(mass * Constants.RescalingConstant) - nominalMass; var errorBin = error + hist.Length / 2; if (errorBin < 0) { errorBin = 0; } if (errorBin >= hist.Length) { errorBin = hist.Length - 1; } hist[errorBin]++; } Console.WriteLine("NumSequences: {0}", numSequences); for (var i = 0; i < hist.Length; i++) { Console.WriteLine("{0}\t{1}\t{2}", i - hist.Length / 2, hist[i], hist[i] / (double)numSequences); } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
private IEnumerable <AnnotationAndOffset> GetAnnotationsAndOffsets(FastaDatabase database) { var indexedDbTarget = new IndexedDatabase(database); IEnumerable <AnnotationAndOffset> annotationsAndOffsets; if (NumTolerableTermini == 0) { annotationsAndOffsets = indexedDbTarget.AnnotationsAndOffsetsNoEnzyme(MinSequenceLength, MaxSequenceLength); } else { annotationsAndOffsets = indexedDbTarget.AnnotationsAndOffsets(MinSequenceLength, MaxSequenceLength, NumTolerableTermini, 2, Enzyme); } return(annotationsAndOffsets); }
[TestCase(3, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta", 323719193)] // 3MB //[TestCase(6, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta", 595227563)] // 6MB //[TestCase(15, @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta", 1882434687)] // 15MB public void TestSequenceEnumerationSerial(double size, string dbFile, int expected) { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName, dbFile); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_002216_235ACCEA.fasta"; // 1.5MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_005133_8491EFA2.fasta"; // 3MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004530_B63BD900.fasta"; // 6MB //const string dbFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\MSPathFinderT\ID_004208_295531A4.fasta"; // 15MB var db = new FastaDatabase(dbFile); var indexedDb = new IndexedDatabase(db); indexedDb.Read(); var numSequences = 0L; var timeDB = sw.Elapsed; Console.WriteLine("Read DB in " + timeDB.TotalSeconds + " Seconds"); var estimatedAnnOff = indexedDb.EstimateTotalPeptides(0, 30, 250); var timeEstimate = sw.Elapsed; Console.WriteLine("Read Estimate in " + (timeEstimate - timeDB).TotalSeconds + " Seconds"); Console.WriteLine("Estimated results: " + estimatedAnnOff); var annotationsAndOffsets = indexedDb.AnnotationsAndOffsetsNoEnzyme(30, 250); var timeGetAnn = sw.Elapsed; Console.WriteLine("Read Annotations in " + (timeGetAnn - timeEstimate).TotalSeconds + " Seconds"); //foreach (var annotationsAndOffset in annotationsAndOffsets) //{ // //Interlocked.Increment(ref numSequences); // ++numSequences; //} //using ( // var ofstream = // new FileStream( // Path.Combine(@"F:\InformedProteomicsTestFiles", // Path.GetFileNameWithoutExtension(dbFile) + "_old.txt"), FileMode.Create)) //using (var fout = new StreamWriter(ofstream)) //{ // foreach (var annOff in annotationsAndOffsets) // { // numSequences++; // fout.WriteLine(annOff.Annotation); // } //} numSequences = annotationsAndOffsets.Count(); var timeParForEach = sw.Elapsed; Console.WriteLine("Parallel ForEach in " + (timeParForEach - timeGetAnn).TotalSeconds + " Seconds"); Console.WriteLine("NumPeptides: {0}", numSequences); sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); //Assert.AreEqual(188961836, numSequences); Assert.AreEqual(expected, numSequences); }