public double PearsonCorrelation(Peak[] spectrum1, Peak[] spectrum2, FilteredProteinMassBinning comparer) { var spec1Bar = 0d; var spec2Bar = 0d; spectrum1 = GuassianFilter(spectrum1, .5); spectrum2 = GuassianFilter(spectrum2, .5); var vectorLength = comparer.GetBinNumber(10000.0); spec1Bar = spectrum1.Sum(x => x.Intensity) / vectorLength; spec2Bar = spectrum1.Sum(y => y.Intensity) / vectorLength; var intensityVector1 = ConvertToFullIntensityVector(spectrum1, vectorLength, comparer); var intensityVector2 = ConvertToFullIntensityVector(spectrum2, vectorLength, comparer); var cov = 0.0; var s1 = 0.0; var s2 = 0.0; for (var i = 0; i < vectorLength; i++) { var d1 = intensityVector1[i] - spec1Bar; var d2 = intensityVector2[i] - spec2Bar; cov += d1 * d2; s1 += d1 * d1; s2 += d2 * d2; } if (s1 <= 0 || s2 <= 0) { return(0); } return(cov < 0 ? 0f : cov / Math.Sqrt(s1 * s2)); }
public void TestProteinMassComparerWithBinning() { var comparer2 = new FilteredProteinMassBinning(new AminoAcidSet(), 50001); for (var i = 9999d; i < 10010; i++) { Console.WriteLine("{0}, {1}", i, comparer2.GetBinNumber(i)); } //var comparer = new ProteinMassBinning(50, 50001, true); /* * Console.WriteLine(Constants.GetBinNumHighPrecision(50000)); * Console.WriteLine(comparer.NumberOfBins); * Console.WriteLine(comparer2.NumberOfBins); * * var rnd = new Random(); * * var mass = 0d; * for (var i = 0; i < 450; i ++) * { * if (i > 0) * { * var j = rnd.Next(aaSet.Length); * mass += aaSet[j].Mass; * } * if (mass > comparer.MaxMass) break; * * var binNum = Constants.GetBinNumHighPrecision(mass); * var binNum1 = comparer.GetBinNumber(mass); * var binNum2 = comparer2.GetBinNumber(mass); * * Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", i, mass, binNum, binNum1, binNum2); * }*/ }
public double[] ConvertToFullIntensityVector(Peak[] spectrum, int length, FilteredProteinMassBinning comparer) { var intensityVector = new double[length]; Array.Clear(intensityVector, 0, length); spectrum = RemovePeaks(10000.0, spectrum); for (var i = 0; i < spectrum.Length; i++) { var binNumber = comparer.GetBinNumber(spectrum[i].Mz); if (binNumber >= 0) { intensityVector[binNumber - 1] = spectrum[i].Intensity; } } return(intensityVector); }
/// <summary> /// Precompute edges for the scoring graph. /// </summary> /// <param name="aminoAcidSet">Amino acid set to build the graph edges from.</param> /// <param name="aminoAcidProbabilities">The amino acid probabilities.</param> /// <returns>A list of all scoring graph edges.</returns> private List <FlipScoringGraphEdge> InitEdges(AminoAcidSet aminoAcidSet, Dictionary <char, double> aminoAcidProbabilities) { var adjList = new LinkedList <FlipScoringGraphEdge> [this.massBins.NumberOfBins]; for (var i = 0; i < this.massBins.NumberOfBins; i++) { adjList[i] = new LinkedList <FlipScoringGraphEdge>(); } var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet); var aminoAcidArray = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet); for (var i = 0; i < this.massBins.NumberOfBins; i++) { var mi = this.massBins.GetMass(i); var fineNodeMass = mi; foreach (var aa in aminoAcidArray) { var j = this.massBins.GetBinNumber(fineNodeMass + aa.Mass); if (j < 0 || j >= this.massBins.NumberOfBins) { continue; } var aaWeight = aminoAcidProbabilities.ContainsKey(aa.Residue) ? Math.Log10(aminoAcidProbabilities[aa.Residue]) : 0; adjList[j].AddLast(new FlipScoringGraphEdge(i, j, aaWeight, aa, null)); if (i == 0 && !(aa is ModifiedAminoAcid)) { foreach (var terminalMod in terminalModifications) { var modifiedAa = new ModifiedAminoAcid(aa, terminalMod); j = this.massBins.GetBinNumber(fineNodeMass + modifiedAa.Mass); if (j < 0 || j >= this.massBins.NumberOfBins) { continue; } adjList[j].AddLast(new FlipScoringGraphEdge(i, j, aaWeight, modifiedAa, null)); } } } } return(adjList.SelectMany(edge => edge).ToList()); }
public ProteinScoringGraphFactory(IMassBinning comparer, AminoAcidSet aminoAcidSet) { _comparer = comparer; _adjList = new LinkedList <ScoringGraphEdge> [_comparer.NumberOfBins]; for (var i = 0; i < _comparer.NumberOfBins; i++) { _adjList[i] = new LinkedList <ScoringGraphEdge>(); } var terminalModifications = FilteredProteinMassBinning.GetTerminalModifications(aminoAcidSet); var aminoAcidArray = FilteredProteinMassBinning.GetExtendedAminoAcidArray(aminoAcidSet); for (var i = 0; i < _comparer.NumberOfBins; i++) { var mi = _comparer.GetMass(i); var fineNodeMass = mi; for (var a = 0; a < aminoAcidArray.Length; a++) { var aa = aminoAcidArray[a]; var j = _comparer.GetBinNumber(fineNodeMass + aa.Mass); if (j < 0 || j >= _comparer.NumberOfBins) { continue; } _adjList[j].AddLast(new ScoringGraphEdge(i)); if (i == 0 && !(aa is ModifiedAminoAcid)) { foreach (var terminalMod in terminalModifications) { var modifiedAa = new ModifiedAminoAcid(aa, terminalMod); j = _comparer.GetBinNumber(fineNodeMass + modifiedAa.Mass); if (j < 0 || j >= _comparer.NumberOfBins) { continue; } _adjList[j].AddLast(new ScoringGraphEdge(i)); } } } } }
public double RootMeanSquareDeviation(Peak[] spectrum1, Peak[] spectrum2, FilteredProteinMassBinning comparer) { spectrum1 = GuassianFilter(spectrum1, .5); spectrum2 = GuassianFilter(spectrum2, .5); var vectorLength = comparer.GetBinNumber(10000.0); var intensityVector1 = ConvertToFullIntensityVector(spectrum1, vectorLength, comparer); var intensityVector2 = ConvertToFullIntensityVector(spectrum2, vectorLength, comparer); var mean1 = spectrum1.Sum(p => p.Intensity) / spectrum1.Length; var mean2 = spectrum1.Sum(p => p.Intensity) / spectrum2.Length; var sum = 0d; for (int i = 0; i < vectorLength; i++) { var diff = intensityVector1[i] - intensityVector2[i]; sum += diff * diff; } return(Math.Sqrt(sum / vectorLength)); }
public double DotProduct(Peak[] spectrum1, Peak[] spectrum2, FilteredProteinMassBinning comparer) { spectrum1 = GuassianFilter(spectrum1, .5); spectrum2 = GuassianFilter(spectrum2, .5); var vectorLength = comparer.GetBinNumber(10000.0); var featureVector1 = ConvertToFullIntensityVector(spectrum1, vectorLength, comparer); var featureVector2 = ConvertToFullIntensityVector(spectrum2, vectorLength, comparer); var sum = 0d; for (int i = 0; i < vectorLength; i++) { sum += featureVector1[i] * featureVector2[i]; } var norm1 = featureVector1.Sum(x => x * x); var norm2 = featureVector2.Sum(x => x * x); return(sum / Math.Sqrt(norm1 * norm2)); }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); var fileExt = new string[] { "IcTarget", "IcDecoy" }; foreach (var ext in fileExt) { var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, parser.NumData, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum; Assert.True(ms2Spec != null); var scores = scorer.GetScores(sequence, charge, scan); var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); var specEvalue = gf.GetSpectralEValue(scores.Score); var rowStr = parser.GetRows()[i]; var items = rowStr.Split('\t').ToArray(); var newRowStr = string.Join("\t", items, 0, 15); //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); lock (lines) { lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); } //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue); }); foreach (var line in lines) { writer.WriteLine(line); } } Console.WriteLine("Done"); } }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); if (pbfFile.DirectoryName == null) { Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName); } var fileExt = new[] { "IcTarget", "IcDecoy" }; foreach (var ext in fileExt) { var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, 30, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); if (!(run.GetSpectrum(scan) is ProductSpectrum ms2Spec)) { Console.WriteLine("Could not get the spectrum datafor scan {0}", scan); }
public void TestGetScoreDistribution(int scanNum, string protSequence) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); if (!pbfFile.Exists) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFile); } const string modStr = ""; const int maxCharge = 20; const int minCharge = 1; const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass)); var stopwatch = Stopwatch.StartNew(); var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); stopwatch.Stop(); Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var stopwatch2 = Stopwatch.StartNew(); var sequence = Sequence.CreateSequence(protSequence, modStr, aaSet); var proteinMass = sequence.Mass + Composition.H2O.Mass; Console.WriteLine("Mass = {0}", proteinMass); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); stopwatch.Restart(); var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(scorer, proteinMass); stopwatch.Stop(); Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); stopwatch.Reset(); stopwatch.Start(); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); //gf.ComputeGeneratingFunction(graph); stopwatch.Stop(); Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var scoreDist = gf.GetScoreDistribution(); Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore); Console.WriteLine("{0} : {1}", "score", "specEValue"); for (var score = 15; score <= gf.MaximumScore; score++) { var specEvalue = gf.GetSpectralEValue(score); Console.WriteLine("{0} : {1}", score, specEvalue); } stopwatch2.Stop(); Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", stopwatch2.ElapsedMilliseconds / 1000.0d); }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); if (pbfFile.DirectoryName == null) { Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName); } var fileExt = new string[] { "IcTarget", "IcDecoy" }; foreach (var ext in fileExt) { var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32((string)s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, 30, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum; if (ms2Spec == null) { Console.WriteLine("Could not get the spectrum datafor scan {0}", scan); } else { Assert.True(ms2Spec != null); var scores = scorer.GetScores(sequence, charge, scan); var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); var specEvalue = gf.GetSpectralEValue(scores.Score); var rowStr = parser.GetRows()[i]; var items = rowStr.Split('\t').ToArray(); var newRowStr = string.Join("\t", items, 0, 15); //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue); } }); foreach (var line in (from item in lines where !string.IsNullOrWhiteSpace(item) select item).Take(20)) { Console.WriteLine(line); } } Console.WriteLine("Done"); } }
public void TestGetScoreDistribution() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string idFileFolder = @"D:\MassSpecFiles\training\IdScoring\MSPF_trainset"; const int scanNum = 5927; const string protSequence = "MNKSELIEKIASGADISKAAAGRALDSFIAAVTEGLKEGDKISLVGFGTFEVRERAERTGRNPQTGEEIKIAAAKIPAFKAGKALKDAVN"; const string modStr = ""; var idFile = string.Format(@"{0}\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv", idFileFolder); if (!File.Exists(idFile)) { return; } //Console.WriteLine(dataset); if (!File.Exists(rawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile); } const int maxCharge = 20; const int minCharge = 1; const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); var run = PbfLcMsRun.GetLcMsRun(rawFile); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass)); var stopwatch = Stopwatch.StartNew(); var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); stopwatch.Stop(); Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var n = 0; var stopwatch2 = Stopwatch.StartNew(); var sequence = Sequence.CreateSequence(protSequence, modStr, aaSet); var proteinMass = sequence.Mass + Composition.H2O.Mass; Console.WriteLine("Mass = {0}", proteinMass); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); stopwatch.Restart(); var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(scorer, proteinMass); stopwatch.Stop(); Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); stopwatch.Reset(); stopwatch.Start(); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); //gf.ComputeGeneratingFunction(graph); stopwatch.Stop(); Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var scoreDist = gf.GetScoreDistribution(); Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore); for (var score = 45; score <= gf.MaximumScore; score++) { var specEvalue = gf.GetSpectralEValue(score); Console.WriteLine("{0} : {1}", score, specEvalue); } stopwatch2.Stop(); Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", (stopwatch2.ElapsedMilliseconds) / 1000.0d); }