public void TestForJiaData() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // QC_Shew //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; // var dbFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_002216_235ACCEA.fasta"); //const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\database\Test.fasta"; // Jia's data const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\Synocho_D1_1.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownJia\database\ID_003962_71E1A1D4.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\TopDownJia\raw\D1_1_Mode1"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var nitrosylC = new SearchModification(Modification.Nitrosyl, 'C', SequenceLocation.Everywhere, false); var nethylmaleimideC = new SearchModification(Modification.Nethylmaleimide, 'C', SequenceLocation.Everywhere, false); //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, nitrosylC, nethylmaleimideC, oxM, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const InternalCleavageType searchMode = InternalCleavageType.SingleInternalCleavage; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term DatabaseSearchMode tda = DatabaseSearchMode.Both; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
private void TestTopDownSearch(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet, int minSequenceLength, int maxSequenceLength, int minPrecursorIonCharge, int maxPrecursorIonCharge, int minProductIonCharge, int maxProductIonCharge, double minSequenceMass, double maxSequenceMass, DatabaseSearchMode tda, InternalCleavageType searchMode) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // Search parameters const int maxNumNTermCleavages = 1; // 30 const int maxNumCTermCleavages = 0; const int precursorIonTolerancePpm = 10; const int productIonTolerancePpm = 10; var topDownOptions = new MsPfParameters( specFilePath, dbFilePath, outputDir, aaSet, "") { MinSequenceLength = minSequenceLength, MaxSequenceLength = maxSequenceLength, MaxNumNTermCleavages = maxNumNTermCleavages, MaxNumCTermCleavages = maxNumCTermCleavages, MinPrecursorIonCharge = minPrecursorIonCharge, MaxPrecursorIonCharge = maxPrecursorIonCharge, MinProductIonCharge = minProductIonCharge, MaxProductIonCharge = maxProductIonCharge, MinSequenceMass = minSequenceMass, MaxSequenceMass = maxSequenceMass, PrecursorIonTolerancePpm = precursorIonTolerancePpm, ProductIonTolerancePpm = productIonTolerancePpm, TargetDecoySearchMode = tda, InternalCleavageMode = searchMode, }; var topDownLauncher = new IcTopDownLauncher(topDownOptions); //topDownLauncher.ForceParallel = true; //topDownLauncher.MaxNumThreads = -1; topDownLauncher.RunSearch(0.7); //topDownLauncher.RunIntactProteinSearch(); }
public void TestForSbepData() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); //// Salmonella var specFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"TopDown\SBEP_STM_001_02272012_Aragon.raw"); var dbFilePath = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"MSPathFinderT\ID_002166_F86E3B2F.fasta"); var outputDir = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"Results\Mod_M2"); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } if (!Directory.Exists(outputDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, outputDir); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, glutathioneC, oxM, acetylN, }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const InternalCleavageType searchMode = InternalCleavageType.NoInternalCleavage; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term DatabaseSearchMode tda = DatabaseSearchMode.Both; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestForAaronData() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownAaron\raw\MTB_intact_1.raw"; const string dbFilePath = @"C:\cygwin\home\kims336\Data\TopDownAaron\database\ID_003121_998584F8.fasta"; const string outputDir = @"C:\cygwin\home\kims336\Data\TopDownAaron\Ic\Mode1_07"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var tevFp2C = new SearchModification(Modification.TevFp2, 'S', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, //glutathioneC, //nitrosylC, //nethylmaleimideC, oxM, acetylN, tevFp2C }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const InternalCleavageType searchMode = InternalCleavageType.SingleInternalCleavage; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term DatabaseSearchMode tda = DatabaseSearchMode.Both; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestForQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // QC_Shew const string specFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string dbFilePath = @"D:\MSPathFinder\Fasta\ID_002216_235ACCEA.fasta"; const string outputDir = @"D:\MassSpecFiles\training\test"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const InternalCleavageType searchMode = InternalCleavageType.NoInternalCleavage; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term DatabaseSearchMode tda = DatabaseSearchMode.Both; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
private void TestTopDownSearch(string specFilePath, string dbFilePath, string outputDir, AminoAcidSet aaSet, DatabaseSearchMode tda, InternalCleavageType searchMode) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const int minSequenceLength = 21; // 7 const int maxSequenceLength = 500; // 1000 const int minPrecursorIonCharge = 2; // 3 const int maxPrecursorIonCharge = 60; // 67 const int minProductIonCharge = 1; // 1 const int maxProductIonCharge = 20; // 15 const double minSequenceMass = 3000.0; const double maxSequenceMass = 50000.0; TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, minSequenceLength, maxSequenceLength, minPrecursorIonCharge, maxPrecursorIonCharge, minProductIonCharge, maxProductIonCharge, minSequenceMass, maxSequenceMass, tda, searchMode ); }
/// <summary> /// Estimate the total number of peptides that will be used in processing - essential for reasonably accurate progress reporting /// </summary> /// <param name="mode"></param> /// <param name="minLength"></param> /// <param name="maxLength"></param> /// <param name="numNTermCleavages"></param> /// <param name="numCTermCleavages"></param> /// <returns></returns> public long EstimateTotalPeptides(InternalCleavageType mode, int minLength = 21, int maxLength = 300, int numNTermCleavages = 1, int numCTermCleavages = 0) { long count = 0; if (mode == InternalCleavageType.MultipleInternalCleavages) { var curSequence = new LinkedList <byte>(); var lcpList = new LinkedList <byte>(); var lcpEnum = PLcps().GetEnumerator(); var fEnum = FastaDatabase.Characters().GetEnumerator(); // Use "IntWrapper" to allow modifying the value inside of the foreach var seps = new Queue <IntWrapper>(); bool read = false; while ((read = fEnum.MoveNext()) || curSequence.Count >= minLength) { if (read) { lcpEnum.MoveNext(); curSequence.AddLast(fEnum.Current); lcpList.AddLast(lcpEnum.Current); if (fEnum.Current == FastaDatabaseConstants.Delimiter) { seps.Enqueue(new IntWrapper(curSequence.Count - 1)); } if (curSequence.Count < maxLength + 2) { continue; } } if (seps.Count > 0 && seps.Peek().Value == 0) { seps.Dequeue(); } var min = minLength > lcpList.First.Value ? minLength : lcpList.First.Value; if (seps.Count == 0 || seps.Peek().Value >= maxLength + 2) { count += maxLength + 2 - min; } else if (seps.Peek().Value >= min) { count += seps.Peek().Value - min; } curSequence.RemoveFirst(); lcpList.RemoveFirst(); foreach (var sep in seps) { --sep.Value; } } } else { // mode 2 foreach (var sonc in SequencesWithOffsetNoCleavage()) { var seqLength = sonc.Sequence.Length; // mode 2 for (int i = 0; i <= numCTermCleavages; i++) { // mode 2 if (mode == InternalCleavageType.NoInternalCleavage && minLength <= seqLength - i && seqLength - i <= maxLength) { count++; } // mode 1 #1 if (mode == InternalCleavageType.SingleInternalCleavage) { for (int j = 0; minLength <= seqLength - i - j; j++) { if (seqLength - i - j <= maxLength) { count++; } } } } if (mode == InternalCleavageType.SingleInternalCleavage) { // mode 1 #2 for (int i = numCTermCleavages + 1; i <= seqLength - minLength; i++) { for (int j = 0; j <= numNTermCleavages; j++) { if (minLength <= seqLength - i - j && seqLength - i - j <= maxLength) { count++; } } } } } } return(count); }
public void TestForVlad() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string specFilePath = @"D:\Research\Data\Vlad\raw\Alz_RA_C1_HCD_11012013_SW_03Nov2013.raw"; const string dbFilePath = @"D:\Research\Data\Vlad\database\ID_004221_1C042A1F.fasta"; //const string dbFilePath = @"D:\Research\Data\Vlad\database\HBA_MOUSE.fasta"; const string outputDir = @"D:\Research\Data\Vlad\Ic\POPSICLETest_M1"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var thrToAla = new SearchModification(Modification.ThrToAla, 'T', SequenceLocation.Everywhere, false); var dethiomethylM = new SearchModification(Modification.Dethiomethyl, 'M', SequenceLocation.Everywhere, false); var deamidatedN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); var deamidatedQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); var serToAsn = new SearchModification(Modification.SerToAsn, 'S', SequenceLocation.Everywhere, false); var pyroCarbamidomethylC = new SearchModification(Modification.PyroCarbamidomethyl, 'C', SequenceLocation.ProteinNTerm, false); var phosphoS = new SearchModification(Modification.Phosphorylation, 'S', SequenceLocation.Everywhere, false); var phosphoT = new SearchModification(Modification.Phosphorylation, 'T', SequenceLocation.Everywhere, false); var phosphoY = new SearchModification(Modification.Phosphorylation, 'Y', SequenceLocation.Everywhere, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, // glutathioneC, oxM, // dethiomethylM, acetylN, phosphoS, phosphoT, phosphoY // thrToAla, // serToAsn, // deamidatedN, // deamidatedQ, // pyroCarbamidomethylC }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); const InternalCleavageType searchMode = InternalCleavageType.SingleInternalCleavage; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term DatabaseSearchMode tda = DatabaseSearchMode.Target; // true: target & decoy, false: target, null: decoy TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, tda, searchMode); }
public void TestForYufeng() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); // QC_Shew const string specFilePath = @"H:\Research\Yufeng\TopDownYufeng\raw\yufeng_column_test2.raw"; //const string dbFilePath = @"H:\Research\Yufeng\TopDownYufeng\database\ID_002216_235ACCEA.fasta"; const string dbFilePath = @"H:\Research\Yufeng\TopDownYufeng\database\SO_3942_Truncated.fasta"; const string outputDir = @"H:\Research\Yufeng\TopDownYufeng\Debug"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } if (!File.Exists(dbFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dbFilePath); } // Configure amino acid set //var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); //var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); //// var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); //var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); //var pyroGluQ = new SearchModification(Modification.PTeyroGluQ, 'Q', SequenceLocation.Everywhere, false); //var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); //var deamdN = new SearchModification(Modification.Deamidation, 'N', SequenceLocation.Everywhere, false); //var deamdQ = new SearchModification(Modification.Deamidation, 'Q', SequenceLocation.Everywhere, false); //const int numMaxModsPerProtein = 0; //var searchModifications = new List<SearchModification> //{ // dehydroC, // oxM, // acetylN //}; //var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var aaSet = new AminoAcidSet(); const InternalCleavageType searchMode = InternalCleavageType.NoInternalCleavage; // 0: all subsequences, 1: close to N- or C-term, 2: close to N- and C-term DatabaseSearchMode tda = DatabaseSearchMode.Target; // true: target & decoy, false: target, null: decoy const int minSequenceLength = 21; // 7 const int maxSequenceLength = 500; // 1000 const int minPrecursorIonCharge = 2; // 3 const int maxPrecursorIonCharge = 50; // 67 const int minProductIonCharge = 1; // 1 const int maxProductIonCharge = 20; // 15 const double minSequenceMass = 3000.0; const double maxSequenceMass = 50000.0; TestTopDownSearch(specFilePath, dbFilePath, outputDir, aaSet, minSequenceLength, maxSequenceLength, minPrecursorIonCharge, maxPrecursorIonCharge, minProductIonCharge, maxProductIonCharge, minSequenceMass, maxSequenceMass, tda, searchMode ); }
public long EstimateTotalPeptides(InternalCleavageType mode, int minLength = 21, int maxLength = 300, int numNTermCleavages = 1, int numCTermCleavages = 0) { long count = 0; if (mode == InternalCleavageType.MultipleInternalCleavages) { var curSequence = new LinkedList<byte>(); var lcpList = new LinkedList<byte>(); var lcpEnum = PLcps().GetEnumerator(); var fEnum = FastaDatabase.Characters().GetEnumerator(); var seps = new Queue<IntWrapper>(); bool read = false; while ((read = fEnum.MoveNext()) || curSequence.Count >= minLength) { if (read) { lcpEnum.MoveNext(); curSequence.AddLast(fEnum.Current); lcpList.AddLast(lcpEnum.Current); if (fEnum.Current == FastaDatabase.Delimiter) { seps.Enqueue(new IntWrapper(curSequence.Count - 1)); } if (curSequence.Count < maxLength + 2) continue; } if (seps.Count > 0 && seps.Peek().Value == 0) { seps.Dequeue(); } var min = minLength > lcpList.First.Value ? minLength : lcpList.First.Value; if (seps.Count == 0 || seps.Peek().Value >= maxLength + 2) { count += maxLength + 2 - min; } else if (seps.Peek().Value >= min) { count += seps.Peek().Value - min; } curSequence.RemoveFirst(); lcpList.RemoveFirst(); foreach (var sep in seps) { --sep.Value; } } } else { // mode 2 foreach (var sonc in SequencesWithOffsetNoCleavage()) { var seqLength = sonc.Sequence.Length; // mode 2 for (int i = 0; i <= numCTermCleavages; i++) { // mode 2 if (mode == InternalCleavageType.NoInternalCleavage && minLength <= seqLength - i && seqLength - i <= maxLength) { count++; } // mode 1 #1 if (mode == InternalCleavageType.SingleInternalCleavage) { for (int j = 0; minLength <= seqLength - i - j; j++) { if (seqLength - i - j <= maxLength) { count++; } } } } if (mode == InternalCleavageType.SingleInternalCleavage) { // mode 1 #2 for (int i = numCTermCleavages + 1; i <= seqLength - minLength; i++) { for (int j = 0; j <= numNTermCleavages; j++) { if (minLength <= seqLength - i - j && seqLength - i - j <= maxLength) { count++; } } } } } } return count; }