public static Spectrum GetDeconvolutedSpectrum(Spectrum spec, int minCharge, int maxCharge, Tolerance tolerance, double corrThreshold, int isotopeOffsetTolerance, double filteringWindowSize = 1.1) { var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, corrThreshold); var peakList = new List <Peak>(); var binHash = new HashSet <int>(); foreach (var deconvolutedPeak in deconvolutedPeaks) { var mass = deconvolutedPeak.Mass; var binNum = GetBinNumber(mass); if (!binHash.Add(binNum)) { continue; } peakList.Add(new Peak(mass, deconvolutedPeak.Intensity)); } var productSpec = spec as ProductSpectrum; if (productSpec != null) { return(new ProductSpectrum(peakList, spec.ScanNum) { MsLevel = spec.MsLevel, ActivationMethod = productSpec.ActivationMethod, IsolationWindow = productSpec.IsolationWindow }); } return(new Spectrum(peakList, spec.ScanNum)); }
private DeconvolutedSpectrum GetDeconvolutedSpectrum(int scan, PbfLcMsRun pbfLcMsRun) { var spectrum = pbfLcMsRun.GetSpectrum(scan) as ProductSpectrum; if (spectrum == null) { return(null); } return(Deconvoluter.GetCombinedDeconvolutedSpectrum(spectrum, 1, 20, 2, new Tolerance(10, ToleranceUnit.Ppm), 0.7)); }
public IScorer GetScorer(int scanNum) { var spec = _run.GetSpectrum(scanNum) as ProductSpectrum; if (spec == null) { return(null); } var deconvolutedSpec = Deconvoluter.GetDeconvolutedSpectrum(spec, _minProductCharge, _maxProductCharge, IsotopeOffsetTolerance, FilteringWindowSize, _productTolerance); return(deconvolutedSpec != null ? new CompositeScorerBasedOnDeconvolutedSpectrum(deconvolutedSpec, spec, _productTolerance, _comparer) : null); }
public IScorer GetScorer(int scanNum) { try { if (!(_run.GetSpectrum(scanNum) is ProductSpectrum spec)) { return(null); } var deconvolutedSpec = Deconvoluter.GetDeconvolutedSpectrum(spec, _minProductCharge, _maxProductCharge, IsotopeOffsetTolerance, FilteringWindowSize, _productTolerance); return(deconvolutedSpec != null ? new CompositeScorerBasedOnDeconvolutedSpectrum(deconvolutedSpec, spec, _productTolerance, _comparer) : null); } catch (Exception ex) { throw new Exception(string.Format("Error getting the scorer for scan {0} in GetScorer: {1}", scanNum, ex.Message), ex); } }
public SequenceTagFinder(ProductSpectrum spec, Tolerance tolerance, int minTagLength = 5, int maxTagLength = 8, AminoAcid[] aminoAcidsArray = null) : base(maxTagLength) { var baseIonTypes = spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD; var ionTypeFactory = new IonTypeFactory(baseIonTypes, new List <NeutralLoss> { NeutralLoss.NoLoss }, MaxCharge); // ReSharper disable once UnusedVariable // This call is used to validate the ion types returned by ionTypeFactory var _ionTypes = ionTypeFactory.GetAllKnownIonTypes().ToArray(); _aminoAcidsArray = aminoAcidsArray ?? AminoAcid.StandardAminoAcidArr; _tolerance = tolerance; if (_aminoAcidsArray.Length - 1 > Byte.MaxValue) { throw new Exception("Too many amino acid types"); } _maxAminoAcidMass = 0d; _minAminoAcidMass = 10E4; foreach (var aa in _aminoAcidsArray) { if (aa.Composition.Mass > _maxAminoAcidMass) { _maxAminoAcidMass = aa.Composition.Mass; } if (aa.Composition.Mass < _minAminoAcidMass) { _minAminoAcidMass = aa.Composition.Mass; } } _minTagLength = minTagLength; _spectrum = spec; _deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(_spectrum.ScanNum, _spectrum.Peaks, MinCharge, MaxCharge, IsotopeOffsetTolerance, 1.1, _tolerance, 0.7); SetNodeCount(_deconvolutedPeaks.Count); CollectSequenceTagGraphEdges(); _seqTagSet = new HashSet <SequenceTag>(); NumberOfProcessedPaths = 0; MaxNumberOfProcessedPaths = 1024; }
public void CountScansTest() { var file = @"C:\Users\wilk011\Documents\DataFiles\MSPF\Ecoli_Ribosome\Ecoli_intact_UVPD-3pulse0p5mJ_05-20-2017.pbf"; var pbfLcmsRun = PbfLcMsRun.GetLcMsRun(file); var deconvoluter = new Deconvoluter(1, 20, 2, 0.1, new Tolerance(10, ToleranceUnit.Ppm)); var lcmsRunDecon = new LcmsRunDeconvoluter(pbfLcmsRun, deconvoluter, 2, 6); var dlcms = new DPbfLcMsRun(file, lcmsRunDecon, keepDataReaderOpen: true); int count = 0; var scans = pbfLcmsRun.GetScanNumbers(2); foreach (var scan in scans) { var spectrum = pbfLcmsRun.GetSpectrum(scan) as ProductSpectrum; if (spectrum.Peaks.Length < 50 || spectrum.IsolationWindow.Charge == null) { continue; } count++; } Console.WriteLine(count); }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SpecFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string rawFilePath = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); var fileExt = new string[] { "IcTarget", "IcDecoy" }; foreach (var ext in fileExt) { var resultFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = string.Format(@"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, parser.NumData, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum; Assert.True(ms2Spec != null); var scores = scorer.GetScores(sequence, charge, scan); var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); var specEvalue = gf.GetSpectralEValue(scores.Score); var rowStr = parser.GetRows()[i]; var items = rowStr.Split('\t').ToArray(); var newRowStr = string.Join("\t", items, 0, 15); //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); lock (lines) { lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); } //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue); }); foreach (var line in lines) { writer.WriteLine(line); } } Console.WriteLine("Done"); } }
/// <summary> /// Get correctly filtered and/or de-convoluted spectrum /// </summary> /// <returns>Filtered and/or de-convoluted spectrum</returns> private Spectrum GetSpectrum() { // Filtered/Deconvoluted Spectrum? var currentSpectrum = this.Spectrum; var tolerance = (currentSpectrum is ProductSpectrum) ? IcParameters.Instance.ProductIonTolerancePpm : IcParameters.Instance.PrecursorTolerancePpm; if (this.ShowFilteredSpectrum && this.ShowDeconvolutedSpectrum) { if (this.filteredDeconvolutedSpectrum == null) { this.filteredDeconvolutedSpectrum = new Spectrum(currentSpectrum.Peaks, currentSpectrum.ScanNum); this.filteredDeconvolutedSpectrum.FilterNosieByIntensityHistogram(); this.deconvolutedSpectrum = Deconvoluter.GetCombinedDeconvolutedSpectrum( currentSpectrum, Constants.MinCharge, Constants.MaxCharge, Constants.IsotopeOffsetTolerance, tolerance, IcParameters.Instance.IonCorrelationThreshold); //this.deconvolutedSpectrum = ProductScorerBasedOnDeconvolutedSpectra.GetDeconvolutedSpectrum( // currentSpectrum, // Constants.MinCharge, // Constants.MaxCharge, // tolerance, // IcParameters.Instance.IonCorrelationThreshold, // Constants.IsotopeOffsetTolerance); } currentSpectrum = this.filteredDeconvolutedSpectrum; } else if (this.ShowFilteredSpectrum) { if (this.filteredSpectrum == null) { this.filteredSpectrum = new Spectrum(currentSpectrum.Peaks, currentSpectrum.ScanNum); this.filteredSpectrum.FilterNosieByIntensityHistogram(); } currentSpectrum = this.filteredSpectrum; } else if (this.ShowDeconvolutedSpectrum) { if (this.deconvolutedSpectrum == null) { this.deconvolutedSpectrum = Deconvoluter.GetCombinedDeconvolutedSpectrum( currentSpectrum, Constants.MinCharge, Constants.MaxCharge, Constants.IsotopeOffsetTolerance, tolerance, IcParameters.Instance.IonCorrelationThreshold); //this.deconvolutedSpectrum = ProductScorerBasedOnDeconvolutedSpectra.GetDeconvolutedSpectrum( // currentSpectrum, // Constants.MinCharge, // Constants.MaxCharge, // tolerance, // IcParameters.Instance.IonCorrelationThreshold, // Constants.IsotopeOffsetTolerance); } currentSpectrum = this.deconvolutedSpectrum; } if (this.ShowOnlyTop20Peaks) { var top20Peaks = currentSpectrum.Peaks.OrderByDescending(p => p.Intensity).Take(20).OrderBy(p => p.Mz).ToList(); currentSpectrum = new Spectrum(top20Peaks, currentSpectrum.ScanNum); } return(currentSpectrum); }
} // true: target and decoy, false: target only, null: decoy only public void QuickId() { const string rawFilePath = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3.raw"; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; const string modFilePath = @"H:\Research\QCShew_TopDown\Production\Mods.txt"; const int numBits = 29; // max error: 4ppm const int minCharge = 1; const int maxCharge = 20; var tolerance = new Tolerance(10); const double corrThreshold = 0.7; var comparer = new MzComparerWithBinning(numBits); const double minFragmentMass = 200.0; const double maxFragmentMass = 50000.0; var minFragMassBin = comparer.GetBinNumber(minFragmentMass); var maxFragMassBin = comparer.GetBinNumber(maxFragmentMass); var aminoAcidSet = new AminoAcidSet(modFilePath); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); var ms2ScanNumArr = run.GetScanNumbers(2).ToArray(); var sw = new Stopwatch(); sw.Start(); Console.Write("Building Spectrum Arrays..."); var massVectors = new BitArray[maxFragMassBin - minFragMassBin + 1]; for (var i = minFragMassBin; i <= maxFragMassBin; i++) { massVectors[i - minFragMassBin] = new BitArray(run.MaxLcScan + 1); } foreach (var ms2ScanNum in ms2ScanNumArr) { var productSpec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (productSpec == null) { continue; } var deconvolutedPeaks = Deconvoluter.GetDeconvolutedPeaks(productSpec.Peaks, minCharge, maxCharge, 2, 1.1, tolerance, corrThreshold); if (deconvolutedPeaks == null) { continue; } foreach (var p in deconvolutedPeaks) { var mass = p.Mass; var deltaMass = tolerance.GetToleranceAsDa(mass, 1); var minMass = mass - deltaMass; var maxMass = mass + deltaMass; var minBinNum = comparer.GetBinNumber(minMass); var maxBinNum = comparer.GetBinNumber(maxMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { if (binNum >= minFragMassBin && binNum <= maxFragMassBin) { massVectors[binNum - minFragMassBin][ms2ScanNum] = true; } } } } sw.Stop(); Console.WriteLine(@"{0:f1} sec.", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); var fastaDb = new FastaDatabase(fastaFilePath); fastaDb.Read(); var indexedDb = new IndexedDatabase(fastaDb); var numProteins = 0; var intactProteinAnnotationAndOffsets = indexedDb.IntactSequenceAnnotationsAndOffsets(0, int.MaxValue); var bestProtein = new string[run.MaxLcScan + 1]; var bestScore = new int[run.MaxLcScan + 1]; foreach (var annotationAndOffset in intactProteinAnnotationAndOffsets) { if (++numProteins % 10 == 0) { Console.WriteLine(@"Processing, {0} proteins done, {1:f1} sec elapsed", numProteins, sw.Elapsed.TotalSeconds); } var annotation = annotationAndOffset.Annotation; var offset = annotationAndOffset.Offset; var protSequence = annotation.Substring(2, annotation.Length - 4); // suffix var seqGraph = SequenceGraph.CreateGraph(aminoAcidSet, AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { continue; } for (var numNTermCleavage = 0; numNTermCleavage <= 1; numNTermCleavage++) { if (numNTermCleavage > 0) { seqGraph.CleaveNTerm(); } var allCompositions = seqGraph.GetAllFragmentNodeCompositions(); var scoreArr = new int[run.MaxLcScan + 1]; foreach (var fragComp in allCompositions) { var suffixMass = fragComp.Mass + BaseIonType.Y.OffsetComposition.Mass; var binNum = comparer.GetBinNumber(suffixMass); if (binNum < minFragMassBin || binNum > maxFragMassBin) { continue; } var vector = massVectors[binNum - minFragMassBin]; foreach (var ms2ScanNum in ms2ScanNumArr) { if (vector[ms2ScanNum]) { ++scoreArr[ms2ScanNum]; } } } foreach (var ms2ScanNum in ms2ScanNumArr) { if (scoreArr[ms2ScanNum] > bestScore[ms2ScanNum]) { bestScore[ms2ScanNum] = scoreArr[ms2ScanNum]; var proteinName = fastaDb.GetProteinName(offset); bestProtein[ms2ScanNum] = proteinName + (numNTermCleavage == 1 ? "'" : ""); } } } // prefix } Console.WriteLine("ScanNum\tBestProtein\tScore"); foreach (var ms2ScanNum in ms2ScanNumArr) { Console.WriteLine("{0}\t{1}\t{2}", ms2ScanNum, bestScore[ms2ScanNum], bestProtein[ms2ScanNum] ?? ""); } }
public void TestGetScoreDistribution(int scanNum, string protSequence) { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); if (!pbfFile.Exists) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFile); } const string modStr = ""; const int maxCharge = 20; const int minCharge = 1; const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass)); var stopwatch = Stopwatch.StartNew(); var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); stopwatch.Stop(); Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var stopwatch2 = Stopwatch.StartNew(); var sequence = Sequence.CreateSequence(protSequence, modStr, aaSet); var proteinMass = sequence.Mass + Composition.H2O.Mass; Console.WriteLine("Mass = {0}", proteinMass); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); stopwatch.Restart(); var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(scorer, proteinMass); stopwatch.Stop(); Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); stopwatch.Reset(); stopwatch.Start(); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); //gf.ComputeGeneratingFunction(graph); stopwatch.Stop(); Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var scoreDist = gf.GetScoreDistribution(); Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore); Console.WriteLine("{0} : {1}", "score", "specEValue"); for (var score = 15; score <= gf.MaximumScore; score++) { var specEvalue = gf.GetSpectralEValue(score); Console.WriteLine("{0} : {1}", score, specEvalue); } stopwatch2.Stop(); Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", stopwatch2.ElapsedMilliseconds / 1000.0d); }
public void TestCompositeScoring() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var pbfFilePath = Utils.GetPbfTestFilePath(false); var pbfFile = Utils.GetTestFile(methodName, pbfFilePath); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); var run = PbfLcMsRun.GetLcMsRun(pbfFile.FullName); const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); const int minCharge = 1; const int maxCharge = 20; var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); var aminoAcidSet = new AminoAcidSet(); //var scorer = new MatchedPeakPostScorer(tolerance, minCharge, maxCharge); var scorer = new InformedTopDownScorer(run, aminoAcidSet, minCharge, maxCharge, tolerance); if (pbfFile.DirectoryName == null) { Assert.Ignore("Ignoring test since cannot determine the parent directory of " + pbfFile.FullName); } var fileExt = new string[] { "IcTarget", "IcDecoy" }; foreach (var ext in fileExt) { var resultFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}.tsv", ext); var parser = new TsvFileParser(resultFileName); var scans = parser.GetData("Scan").Select(s => Convert.ToInt32((string)s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var protSequences = parser.GetData("Sequence").ToArray(); var modStrs = parser.GetData("Modifications").ToArray(); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var protMass = parser.GetData("Mass").Select(s => Convert.ToDouble(s)).ToArray(); var outputFileName = Path.Combine(pbfFile.DirectoryName, Path.GetFileNameWithoutExtension(pbfFile.Name)) + string.Format("_{0}_Rescored.tsv", ext); using (var writer = new StreamWriter(outputFileName)) { writer.WriteLine(string.Join("\t", parser.GetHeaders().ToArray(), 0, 15) + "\tScore\tEValue"); var lines = new string[parser.NumData]; //for (var i = 0; i < parser.NumData; i++) Parallel.For(0, 30, i => { var scan = scans[i]; var charge = charges[i]; var protSequence = protSequences[i]; var modStr = modStrs[i]; var sequence = Sequence.CreateSequence(protSequence, modStr, aminoAcidSet); // Assert.True(sequence.Composition.Equals(compositions[i] - Composition.H2O)); var ms2Spec = run.GetSpectrum(scan) as ProductSpectrum; if (ms2Spec == null) { Console.WriteLine("Could not get the spectrum datafor scan {0}", scan); } else { Assert.True(ms2Spec != null); var scores = scorer.GetScores(sequence, charge, scan); var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(ms2Spec, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); var deconvScorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, ms2Spec, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(deconvScorer, protMass[i]); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); var specEvalue = gf.GetSpectralEValue(scores.Score); var rowStr = parser.GetRows()[i]; var items = rowStr.Split('\t').ToArray(); var newRowStr = string.Join("\t", items, 0, 15); //writer.WriteLine("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); lines[i] = string.Format("{0}\t{1}\t{2}", newRowStr, scores.Score, specEvalue); //Console.WriteLine("{0}\t{1}\t{2}", items[0], scores.Score, specEvalue); } }); foreach (var line in (from item in lines where !string.IsNullOrWhiteSpace(item) select item).Take(20)) { Console.WriteLine(line); } } Console.WriteLine("Done"); } }
public void TestGetScoreDistribution() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFile = @"D:\MassSpecFiles\training\raw\QC_Shew_Intact_26Sep14_Bane_C2Column3.pbf"; const string idFileFolder = @"D:\MassSpecFiles\training\IdScoring\MSPF_trainset"; const int scanNum = 5927; const string protSequence = "MNKSELIEKIASGADISKAAAGRALDSFIAAVTEGLKEGDKISLVGFGTFEVRERAERTGRNPQTGEEIKIAAAKIPAFKAGKALKDAVN"; const string modStr = ""; var idFile = string.Format(@"{0}\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv", idFileFolder); if (!File.Exists(idFile)) { return; } //Console.WriteLine(dataset); if (!File.Exists(rawFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFile); } const int maxCharge = 20; const int minCharge = 1; const double filteringWindowSize = 1.1; const int isotopeOffsetTolerance = 2; var tolerance = new Tolerance(10); var run = PbfLcMsRun.GetLcMsRun(rawFile); // Configure amino acid set var oxM = new SearchModification(Modification.Oxidation, 'M', SequenceLocation.Everywhere, false); var dehydroC = new SearchModification(Modification.Dehydro, 'C', SequenceLocation.Everywhere, false); var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, false); const int numMaxModsPerProtein = 4; var searchModifications = new List <SearchModification> { dehydroC, oxM, acetylN }; var aaSet = new AminoAcidSet(searchModifications, numMaxModsPerProtein); var comparer = new FilteredProteinMassBinning(aaSet, 50000, 28); //Console.WriteLine("{0}\t{1}", comparer.NumberOfBins, comparer.GetBinNumber(proteinMass)); var stopwatch = Stopwatch.StartNew(); var graphFactory = new ProteinScoringGraphFactory(comparer, aaSet); stopwatch.Stop(); Console.WriteLine(@"edge generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var n = 0; var stopwatch2 = Stopwatch.StartNew(); var sequence = Sequence.CreateSequence(protSequence, modStr, aaSet); var proteinMass = sequence.Mass + Composition.H2O.Mass; Console.WriteLine("Mass = {0}", proteinMass); var spectrum = run.GetSpectrum(scanNum) as ProductSpectrum; var deconvSpec = Deconvoluter.GetDeconvolutedSpectrum(spectrum, minCharge, maxCharge, isotopeOffsetTolerance, filteringWindowSize, tolerance, 0.7); stopwatch.Restart(); var scorer = new CompositeScorerBasedOnDeconvolutedSpectrum(deconvSpec, spectrum, tolerance, comparer); var graph = graphFactory.CreateScoringGraph(scorer, proteinMass); stopwatch.Stop(); Console.WriteLine(@"node generation elapsed time = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); stopwatch.Reset(); stopwatch.Start(); var gf = new GeneratingFunction(graph); gf.ComputeGeneratingFunction(); //gf.ComputeGeneratingFunction(graph); stopwatch.Stop(); Console.WriteLine(@"computing generation function = {0:0.000} sec", (stopwatch.ElapsedMilliseconds) / 1000.0d); var scoreDist = gf.GetScoreDistribution(); Console.WriteLine("{0}-{1}", scoreDist.MinScore, scoreDist.MaxScore); for (var score = 45; score <= gf.MaximumScore; score++) { var specEvalue = gf.GetSpectralEValue(score); Console.WriteLine("{0} : {1}", score, specEvalue); } stopwatch2.Stop(); Console.WriteLine(@"TOTAL computing generation function = {0:0.000} sec", (stopwatch2.ElapsedMilliseconds) / 1000.0d); }