// Feature, precursorScore, totalScore public Tuple<Feature, double, double> GetBestFeatureAndScore(int precursorCharge) { var precursorIon = new Ion(_sequenceComposition + Composition.H2O, precursorCharge); var imsScorer = _imsScorerFactory.GetImsScorer(_imsData, precursorIon); var precursorFeatureSet = _imsData.GetPrecursorFeatures(precursorIon.GetMz()); var bestPrecursorScore = double.NegativeInfinity; var bestScore = double.NegativeInfinity; Feature bestFeature = null; foreach (var precursorFeature in precursorFeatureSet) { var precursorScore = imsScorer.GetPrecursorScore(precursorFeature); if (precursorScore < PrecursorScoreThreshold) continue; var productScore = GetProductIonScore(imsScorer, precursorFeature); var curFeatureScore = precursorScore + productScore; if (curFeatureScore > bestScore) { bestPrecursorScore = precursorScore; bestScore = curFeatureScore; bestFeature = precursorFeature; } } return new Tuple<Feature, double, double>(bestFeature, bestPrecursorScore, bestScore); }
public void TestGetAllIsotopePeaks() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"H:\Research\GlycoTopDown\raw\User_sample_test_02252015.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } //const int scanNum = 17338; const double relativeIntensity = 0.1; var run = PbfLcMsRun.GetLcMsRun(specFilePath); var spec = run.GetSpectrum(17338); var comp = Composition.Parse("C(610) H(945) N(172) O(189) S(3)"); var ion = new Ion(comp + BaseIonType.B.OffsetComposition, 9); // b127(9+) Console.WriteLine("Composition: " + comp + " " + comp.Mass); Console.WriteLine("b127(9+): " + ion.GetMonoIsotopicMz()); Console.WriteLine("b127(9+) 0th isotope: " + ion.GetIsotopeMz(0)); Console.WriteLine("b127(9+) 6th isotope: " + ion.GetIsotopeMz(6)); var peaks = spec.GetAllIsotopePeaks(ion, new Tolerance(10), relativeIntensity); var isotopes = ion.GetIsotopes(relativeIntensity).ToArray(); for (var i = 0; i < isotopes.Length; i++) { if (peaks[i] == null) continue; var isotopeIndex = isotopes[i].Index; Console.WriteLine("{0}\t{1}\t{2}\t{3}", isotopeIndex, peaks[isotopeIndex].Mz, ion.GetIsotopeMz(isotopeIndex), GetPeakPpmError(peaks[isotopeIndex], ion.GetIsotopeMz(isotopeIndex))); } }
private bool IsValidForMs1Scan(Ion precursorIon, int scanNum) { if (scanNum < Run.MinLcScan || scanNum > Run.MaxLcScan) return false; if (Run.GetMsLevel(scanNum) != 1) return true; var spec = Run.GetSpectrum(scanNum); return spec != null && spec.ContainsIon(precursorIon, MzTolerance, RelativeIsotopeIntensityThreshold); }
public override double GetFragmentScore(Composition prefixFragmentComposition, Composition suffixFragmentComposition) { var score = 0.0; foreach (var baseIonType in BaseIonTypes) { var fragmentComposition = baseIonType.IsPrefix ? prefixFragmentComposition + baseIonType.OffsetComposition : suffixFragmentComposition + baseIonType.OffsetComposition; if (fragmentComposition.Mass < Ms2Spectrum.Peaks[0].Mz) continue; var chargeRange = GetMinMaxChargeRange(fragmentComposition); var containsIon = false; for (var charge = chargeRange.Min; charge <= chargeRange.Max; charge++) { var ion = new Ion(fragmentComposition, charge); if (Ms2Spectrum.ContainsIon(ion, Tolerance, RelativeIsotopeIntensityThreshold)) { containsIon = true; break; } } if (containsIon) score += 1.0; } return score; }
public GroupParameter(Composition cutComposition, char nTermAA, char cTermAA, Ion precursorIon) { MassIndex = GetMassIndex(precursorIon.Composition); Charge = Math.Max(MinCharge, Math.Min(precursorIon.Charge, MaxCharge)); LocationIndex = GetLocationIndex(precursorIon.Composition, cutComposition); FlankingResidueIndex = GetFlankingResidueIndex(nTermAA, cTermAA); }
public double GetFragmentScore(Composition prefixFragmentComposition, Composition suffixFragmentComposition) { var score = 0.0; foreach (var baseIonType in _baseIonTypes) { var fragmentComposition = baseIonType.IsPrefix ? prefixFragmentComposition + baseIonType.OffsetComposition : suffixFragmentComposition + baseIonType.OffsetComposition; fragmentComposition.ComputeApproximateIsotopomerEnvelop(); var isotopomerEnvelope = fragmentComposition.GetIsotopomerEnvelop(); //var bestFitScore = 1.0; var bestCosineScore = 0.0; var bestObsIntensity = -1.0; for (var charge = _minCharge; charge <= _maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); var cosineScore = _ms2Spec.GetConsineScore(ion, _tolerance, RelativeIntensityThreshold); if (cosineScore > bestCosineScore) bestCosineScore = cosineScore; //var observedPeaks = _ms2Spec.GetAllIsotopePeaks(ion, _tolerance, RelativeIntensityThreshold); //if (observedPeaks == null) continue; //var theoIntensities = new float[observedPeaks.Length]; //var observedIntensities = new float[observedPeaks.Length]; //var maxObservedIntensity = float.NegativeInfinity; //for (var i = 0; i < observedPeaks.Length; i++) //{ // theoIntensities[i] = isotopomerEnvelope[i]; // if (observedPeaks[i] != null) // { // var observedIntensity = (float)observedPeaks[i].Intensity; // observedIntensities[i] = observedIntensity; // if (observedIntensity > maxObservedIntensity) maxObservedIntensity = observedIntensity; // } // else // { // observedIntensities[i] = 0; // } //} //for (var i = 0; i < observedPeaks.Length; i++) //{ // observedIntensities[i] /= maxObservedIntensity; //} //var fitScore = FitScoreCalculator.GetFitOfNormalizedVectors(isotopomerEnvelope, observedIntensities); //if (fitScore < bestFitScore) //{ // bestFitScore = fitScore; // bestObsIntensity = maxObservedIntensity; //} } //score += GetScore(baseIonType, bestFitScore, bestObsIntensity); score += GetScore(baseIonType, bestCosineScore, bestObsIntensity); } return score; }
public void AddMostAbundantIsotopePeakIntensity() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + rawFilePath); } var run = PbfLcMsRun.GetLcMsRun(rawFilePath); const string resultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTda.tsv"; var parser = new TsvFileParser(resultFilePath); var compositions = parser.GetData("Composition").Select(Composition.Parse).ToArray(); var scanNums = parser.GetData("Scan").Select(s => Convert.ToInt32(s)).ToArray(); var charges = parser.GetData("Charge").Select(s => Convert.ToInt32(s)).ToArray(); var precursorIntensities = new double[parser.NumData]; var tolerance = new Tolerance(10); for (var i = 0; i < parser.NumData; i++) { var scanNum = scanNums[i]; var composition = compositions[i]; var charge = charges[i]; var precursorIon = new Ion(composition, charge); var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var isotopePeaks = precursorSpec.GetAllIsotopePeaks(precursorIon, tolerance, 0.1); if (isotopePeaks != null) { var maxIntensity = 0.0; for (var j = 0; j < isotopePeaks.Length; j++) { if (isotopePeaks[j] != null && isotopePeaks[j].Intensity > maxIntensity) maxIntensity = isotopePeaks[j].Intensity; } precursorIntensities[i] = maxIntensity; } } // Writing const string newResultFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TestYufengData\QC_ShewIntact_40K_LongSeparation_1_141016155143_IcTdaWithIntensities.tsv"; using (var writer = new StreamWriter(newResultFilePath)) { writer.WriteLine(string.Join("\t", parser.GetHeaders())+"\t"+"PrecursorIntensity"); for (var i = 0; i < parser.NumData; i++) { writer.WriteLine(parser.GetRows()[i]+"\t"+precursorIntensities[i]); } } Console.WriteLine("Done"); }
public const double RelativeIsotopeIntensityThreshold = 0.8; // 0.5 public bool IsValid(Ion precursorIon, int scanNum) { if (Run.GetMsLevel(scanNum) != 2) return false; var precursorScanNum = Run.GetPrecursorScanNum(scanNum); var nextMs1ScanNum = Run.GetNextScanNum(scanNum); var isValid = IsValidForMs1Scan(precursorIon, precursorScanNum) || IsValidForMs1Scan(precursorIon, nextMs1ScanNum); return isValid; }
public const double RelativeIsotopeIntensityThreshold = 0.8; // 0.5 public bool IsValid(Ion precursorIon, int scanNum) { if (Run.GetMsLevel(scanNum) != 2) return false; var precursorScanNum = Run.GetPrecursorScanNum(scanNum); var nextMs1ScanNum = Run.GetNextScanNum(scanNum); var isValid = IsValidForMs1Scan(precursorIon, precursorScanNum) || IsValidForMs1Scan(precursorIon, nextMs1ScanNum); //Console.WriteLine("{0}\t{1}\t{2}\t{3}", precursorIon.GetMonoIsotopicMz(), precursorIon.precursorCharge, scanNum, isValid); return isValid; }
public void OutputStatistics(ProductSpectrum spectrum, Sequence sequence) { var baseIonTypes = spectrum.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCid : BaseIonTypesEtd; var cleavages = sequence.GetInternalCleavages().ToArray(); var tolerance = new Tolerance(10); var maxIntensity = spectrum.Peaks.Max(p => p.Intensity); foreach (var c in cleavages) { foreach (var baseIonType in baseIonTypes) { var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; for (int charge = MinCharge; charge <= MaxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); var observedPeaks = spectrum.GetAllIsotopePeaks(ion, tolerance, RelativeIsotopeIntensityThreshold); if (observedPeaks == null) continue; var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); // representative peak intensity var ionPeakIntensity = observedPeaks[mostAbundantIsotopeIndex].Intensity; // calc. correlation var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } var corrCoeff = FitScoreCalculator.GetPearsonCorrelation(isotopomerEnvelope, observedIntensities); // mz error var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex); var errorPpm = ((observedPeaks[mostAbundantIsotopeIndex].Mz - mostAbundantIsotopeMz)/ mostAbundantIsotopeMz)*1e6; } } } }
public void TestFitScoreCalculationCid() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestLcMsRun.TestTopDownRawFilePathCid)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestLcMsRun.TestTopDownRawFilePathCid); } var run = InMemoryLcMsRun.GetLcMsRunScanRange(TestLcMsRun.TestTopDownRawFilePathCid, 5743, 5743); var spec = run.GetSpectrum(5743); Assert.True(spec != null); const string protein = "MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGIVVDYVLEFDVPDELIVDRIVGRRVHAASGRVYHVKFNPPKVEGKDDVTGEDLTTRKDDQEETVRKRLVEYHQMTAPLIGYYQKEAEAGNTKYAKVDGTQAVADVRAALEKILG"; var protComp = new AminoAcidSet().GetComposition(protein) + Composition.H2O; Assert.True(protComp != null); Assert.True(protComp.C == 1035); Assert.True(protComp.H == 1683); Assert.True(protComp.N == 289); Assert.True(protComp.O == 318); Assert.True(protComp.P == 0); Assert.True(protComp.S == 7); Assert.True(Math.Abs(protComp.Mass - 23473.245267145) < 0.0000001); Assert.True(protComp.NominalMass == 23461); var ion = new Ion(protComp, 20); // ion.Composition.ComputeApproximateIsotopomerEnvelop(); var isotopomerEnvelop = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); Console.WriteLine(@"MonoMz: {0}, MonoMass: {1}", ion.GetMonoIsotopicMz(), ion.Composition.Mass); var matchedPeaks = spec.GetAllIsotopePeaks(ion, new Tolerance(15), 0.1); for (var i = 0; i < matchedPeaks.Length; i++) { Console.WriteLine(@"{0} {1} {2} {3}", i, ion.GetIsotopeMz(i), isotopomerEnvelop[i], matchedPeaks[i] == null ? 0 : matchedPeaks[i].Intensity); } var fitScore = spec.GetFitScore(ion, new Tolerance(15), 0.1); var cosine = spec.GetConsineScore(ion, new Tolerance(15), 0.1); var corr = spec.GetCorrScore(ion, new Tolerance(15), 0.1); Console.WriteLine(@"FitScore: {0}", fitScore); Console.WriteLine(@"Cosine: {0}", cosine); Console.WriteLine(@"Corr: {0}", corr); Assert.True(Math.Abs(fitScore - 0.181194589537041) < 0.0001); Assert.True(Math.Abs(cosine - 0.917609346566222) < 0.0001); Assert.True(Math.Abs(corr - 0.808326778009839) < 0.0001); }
/// <summary> /// Find intensity rank of ion in spectrum. /// </summary> /// <param name="ion">Ion to search for.</param> /// <param name="tolerance"></param> /// <returns>Intensity rank of ion. 1-based.</returns> public int RankIon(Ion ion, Tolerance tolerance) { var peak = _spectrum.FindPeak(ion.GetMonoIsotopicMz(), tolerance); int position; if (peak != null) { var searchPeak = new Peak(peak.Mz, peak.Intensity); position = Array.BinarySearch(Peaks, searchPeak, new IntensityComparer()); position++; } else { position = -1; } return position; }
protected IEnumerable<DeconvolutedPeak> FindMatchedPeaks(Composition.Composition fragmentComposition, double corrThreshold, double distThreshold) { var mostAbundantIsotopeIndex = fragmentComposition.GetMostAbundantIsotopeZeroBasedIndex(); var fragmentIonMass = fragmentComposition.Mass; //var matchedPeak = new MatchedFragmentPeak(); //var deconvPeak = new DeconvolutedPeak() if (fragmentIonMass < Ms2Spectrum.Peaks.First().Mz) yield break; var prevObservedCharge = 0; var fragmentIonMostAbuMass = fragmentIonMass + Constants.C13MinusC12 * mostAbundantIsotopeIndex; var chargeRange = GetMinMaxChargeRange(fragmentIonMostAbuMass); for (var charge = chargeRange.Min; charge <= chargeRange.Max; charge++) { var ion = new Ion(fragmentComposition, charge); var observedPeaks = Ms2Spectrum.GetAllIsotopePeaks(ion, Tolerance, RelativeIsotopeIntensityThreshold); if (observedPeaks == null) { if (prevObservedCharge > 0 && charge - prevObservedCharge > 1) yield break; continue; } var distCorr = GetDistCorr(ion, observedPeaks); if (distCorr.Item2 < corrThreshold && distCorr.Item1 > distThreshold) { if (prevObservedCharge > 0 && charge - prevObservedCharge > 1) yield break; continue; } var matchedPeak = new DeconvolutedPeak(fragmentIonMass, observedPeaks[mostAbundantIsotopeIndex].Intensity, charge, distCorr.Item2, distCorr.Item1, observedPeaks); prevObservedCharge = charge; yield return matchedPeak; } }
internal ScoringGraph(AminoAcid[] aminoAcidSequence, Composition sequenceComposition, ScoringGraphNode rootNode, int minPrecursorCharge, int maxPrecursorCharge) { _aminoAcidSequence = aminoAcidSequence; _sequenceComposition = sequenceComposition; _rootNode = rootNode; _minPrecursorCharge = minPrecursorCharge; _maxPrecursorCharge = maxPrecursorCharge; _precursorIon = new Dictionary<int, Ion>(); for (var precursorCharge = _minPrecursorCharge; precursorCharge <= _maxPrecursorCharge; precursorCharge++) { _precursorIon[precursorCharge] = new Ion(_sequenceComposition, precursorCharge); } // store all nodes in an array var nodes = new HashSet<ScoringGraphNode>(); var curNodes = new HashSet<ScoringGraphNode> { _rootNode }; while (curNodes.Any()) { var newNodes = new HashSet<ScoringGraphNode>(); foreach (var node in curNodes) { if (nodes.Add(node)) // if node is new { foreach (var nextNode in node.GetNextNodes()) { newNodes.Add(nextNode); } } } curNodes = newNodes; } _nodes = nodes.ToArray(); }
public void ScorePeptides(string outputFilePath) { using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("Annotation\tCharge\tScanNum"); foreach (var annotation in PeptideEnumerator) { // annotation: pre + "." + peptide + "." + post (e.g. R.PEPTIDER.G) var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation); foreach (var sequenceComposition in seqGraph.GetSequenceCompositions()) { var peptideComposition = sequenceComposition + Composition.H2O; for (var precursorCharge = MinCharge; precursorCharge <= MaxCharge; precursorCharge++) { var precursorIon = new Ion(peptideComposition, precursorCharge); foreach (var scanNum in Run.GetFragmentationSpectraScanNums(precursorIon)) { writer.WriteLine("{0}\t{1}\t{2}", annotation, precursorCharge, scanNum); } } } } } }
public static Tuple<double, double> GetDistCorr(Ion ion, Peak[] observedPeaks) { var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var envelope = isotopomerEnvelope; var observedIntensities = new double[envelope.Length]; for (var i = 0; i < isotopomerEnvelope.Length; i++) { if (observedPeaks[i] != null) observedIntensities[i] = observedPeaks[i].Intensity; } return FitScoreCalculator.GetDistanceAndCorrelation(envelope, observedIntensities); //var bcDist = FitScoreCalculator.GetBhattacharyyaDistance(envelope, observedIntensities); //var corr = FitScoreCalculator.GetPearsonCorrelation(envelope, observedIntensities); //return new Tuple<double, double>(bcDist, corr); }
/* protected Peak[] FindMostAbundantPeak(Composition.Composition fragmentComposition, double corrThreshold, double distThreshold, out int observedCharge, out double envelopeCorr, out double envelopeDist) { //Peak[] intenseObservedPeaks = null; var mostAbundantIsotopeIndex = fragmentComposition.GetMostAbundantIsotopeZeroBasedIndex(); var fragmentIonMass = fragmentComposition.Mass; observedCharge = 0; envelopeCorr = 0d; envelopeDist = 1.0d; if (fragmentIonMass < Ms2Spectrum.Peaks.First().Mz) return null; var fragmentIonMostAbuMass = fragmentIonMass + Constants.C13MinusC12 * mostAbundantIsotopeIndex; var chargeRange = GetMinMaxChargeRange(fragmentIonMostAbuMass); for (var charge = chargeRange.Min; charge <= chargeRange.Max; charge++) { var ion = new Ion(fragmentComposition, charge); var observedPeaks = Ms2Spectrum.GetAllIsotopePeaks(ion, Tolerance, RelativeIsotopeIntensityThreshold); if (observedPeaks == null) continue; var distCorr = GetDistCorr(ion, observedPeaks); if (distCorr.Item2 < corrThreshold && distCorr.Item1 > distThreshold) continue; //var mostAbuPeak = observedPeaks[mostAbundantIsotopeIndex]; //if (intenseObservedPeaks == null || mostAbuPeak.Intensity > intenseObservedPeaks[mostAbundantIsotopeIndex].Intensity) //{ //intenseObservedPeaks = observedPeaks; observedCharge = charge; envelopeDist = distCorr.Item1; envelopeCorr = distCorr.Item2; return observedPeaks; //} } return null; } */ protected Peak[] FindMostIntensePeak(Composition.Composition fragmentComposition, double corrThreshold, double distThreshold, out int observedCharge, out double envelopeCorr, out double envelopeDist) { Peak[] intenseObservedPeaks = null; var mostAbundantIsotopeIndex = fragmentComposition.GetMostAbundantIsotopeZeroBasedIndex(); var fragmentIonMass = fragmentComposition.Mass; observedCharge = 0; envelopeCorr = 0d; envelopeDist = 1.0d; if (fragmentIonMass < Ms2Spectrum.Peaks.First().Mz) return null; var fragmentIonMostAbuMass = fragmentIonMass + Constants.C13MinusC12 * mostAbundantIsotopeIndex; var chargeRange = GetMinMaxChargeRange(fragmentIonMostAbuMass); for (var charge = chargeRange.Min; charge <= chargeRange.Max; charge++) { var ion = new Ion(fragmentComposition, charge); var observedPeaks = Ms2Spectrum.GetAllIsotopePeaks(ion, Tolerance, RelativeIsotopeIntensityThreshold); if (observedPeaks == null) continue; var distCorr = GetDistCorr(ion, observedPeaks); if (distCorr.Item2 < corrThreshold && distCorr.Item1 > distThreshold) continue; var mostAbuPeak = observedPeaks[mostAbundantIsotopeIndex]; if (intenseObservedPeaks == null || mostAbuPeak.Intensity > intenseObservedPeaks[mostAbundantIsotopeIndex].Intensity) { intenseObservedPeaks = observedPeaks; observedCharge = charge; envelopeDist = distCorr.Item1; envelopeCorr = distCorr.Item2; } } return intenseObservedPeaks; }
private SortedSet<DatabaseSequenceSpectrumMatch>[] RunSearch(IEnumerable<AnnotationAndOffset> annotationsAndOffsets, ISequenceFilter ms1Filter, bool isDecoy) { var sw = new Stopwatch(); var numPeptides = 0; sw.Reset(); sw.Start(); var matches = new SortedSet<DatabaseSequenceSpectrumMatch>[_run.MaxLcScan + 1]; // TODO: N-term Met cleavage foreach (var annotationAndOffset in annotationsAndOffsets) { ++numPeptides; var annotation = annotationAndOffset.Annotation; var offset = annotationAndOffset.Offset; if (numPeptides % 100000 == 0) { Console.Write(@"Processing {0}{1} peptides...", numPeptides, numPeptides == 1 ? "st" : numPeptides == 2 ? "nd" : numPeptides == 3 ? "rd" : "th"); if (numPeptides != 0) { sw.Stop(); var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); sw.Start(); } } var seqGraph = SequenceGraph.CreateGraph(AminoAcidSet, annotation); if (seqGraph == null) { // Console.WriteLine("Ignoring illegal protein: {0}", annotation); continue; } //var protCompositions = seqGraph.GetSequenceCompositions(); var numProteoforms = seqGraph.GetNumProteoformCompositions(); var modCombs = seqGraph.GetModificationCombinations(); for (var modIndex = 0; modIndex < numProteoforms; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; var modCombinations = modCombs[modIndex]; foreach (var ms2ScanNum in ms1Filter.GetMatchingMs2ScanNums(sequenceMass)) { var spec = _run.GetSpectrum(ms2ScanNum) as ProductSpectrum; if (spec == null) continue; var charge = (int)Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var scorer = _ms2ScorerFactory.GetMs2Scorer(ms2ScanNum); var score = seqGraph.GetFragmentScore(scorer); if (score <= 2) continue; var precursorIon = new Ion(protCompositionWithH2O, charge); var sequence = annotation.Substring(2, annotation.Length - 4); var pre = annotation[0]; var post = annotation[annotation.Length - 1]; var prsm = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, 0, modCombinations, precursorIon, score, isDecoy); if (matches[ms2ScanNum] == null) { matches[ms2ScanNum] = new SortedSet<DatabaseSequenceSpectrumMatch> { prsm }; } else // already exists { var existingMatches = matches[ms2ScanNum]; if (existingMatches.Count < NumMatchesPerSpectrum) existingMatches.Add(prsm); else { var minScore = existingMatches.Min.Score; if (score > minScore) { existingMatches.Add(prsm); existingMatches.Remove(existingMatches.Min); } } } } } } return matches; }
public Tuple<int, int, int, int, string, string> GetLongestSequence(ProductSpectrum spectrum, Sequence sequence) { _spectrum = spectrum; _sequence = sequence; _baseIonTypes = _spectrum.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCid : BaseIonTypesEtd; var cleavages = _sequence.GetInternalCleavages().ToArray(); var prefixValueArr = new int[cleavages.Length]; var suffixValueArr = new int[cleavages.Length]; var prefixPeakArr = new Peak[cleavages.Length]; var suffixPeakArr = new Peak[cleavages.Length]; //var peakList = new double[_spectrum.Peaks.Length]; int cleavageIndex = 0; /* for (int i = 0; i < peakList.Length; i++) { peakList[i] = _spectrum.Peaks[i].Intensity; }*/ //var rankings = ArrayUtil.GetRankings(peakList); foreach (var c in cleavages) { foreach (var baseIonType in _baseIonTypes) { var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; for (var charge = _minCharge; charge <= _maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); if (_spectrum.GetCorrScore(ion, _tolerance, RelativeIsotopeIntensityThreshold) < .7) continue; if (baseIonType.IsPrefix) prefixValueArr[cleavageIndex] = 1; else suffixValueArr[cleavageIndex] = 1; } } cleavageIndex++; } var prefixSequenceArr = new int[_sequence.Count]; var suffixSequenceArr = new int[_sequence.Count]; prefixSequenceArr[0] = prefixValueArr[0]; suffixSequenceArr[suffixSequenceArr.Length - 1] = suffixValueArr[suffixValueArr.Length - 1]; for (int i = 1; i < prefixValueArr.Length; i++) { if (prefixValueArr[i] == 1 && prefixValueArr[i - 1] == 1) { if (_sequence[i] is ModifiedAminoAcid) continue; prefixSequenceArr[i] = 1; } } for (int i = suffixValueArr.Length - 2; i >= 0; i--) { if (suffixValueArr[i] == 1 && suffixValueArr[i + 1] == 1) { if (_sequence[i + 1] is ModifiedAminoAcid) continue; suffixSequenceArr[i + 1] = 1; } } var prefixSubString = FindLongestSubstring(prefixSequenceArr); var prefixStartIndex = -1; var prefixEndIndex = -1; //var prefixSequencePeaks = new List<Peak>(); //var prefixPval = -1.0; var prefixSequence = ""; if (prefixSubString != "") { var prefixIndex = string.Concat(prefixSequenceArr); prefixStartIndex = prefixIndex.IndexOf(prefixSubString) + 1; prefixEndIndex = (prefixStartIndex == 1) ? 1 : prefixStartIndex + prefixSubString.Length - 1; //prefixSequencePeaks = GetPrefixSequencePeaks(prefixPeakArr, prefixStartIndex, prefixEndIndex); //var prefixRankSum = GetSequenceRankSum(prefixSequencePeaks, rankings, peakList); //prefixPval = FitScoreCalculator.GetRankSumPvalue(peakList.Length, prefixSequencePeaks.Count, prefixRankSum); prefixSequence = GetStringSubSequence(_sequence, prefixStartIndex, prefixEndIndex); } var suffixSubString = FindLongestSubstring(suffixSequenceArr); var suffixStartIndex = -1; var suffixEndIndex = -1; //var suffixSequencePeaks = new List<Peak>(); //var suffixPval = -1.0; var suffixSequence = ""; if (suffixSubString != "") { var suffixIndex = string.Concat(suffixSequenceArr); suffixStartIndex = suffixIndex.IndexOf(suffixSubString) + 1; suffixEndIndex = (suffixStartIndex == 1) ? 1 : suffixStartIndex + suffixSubString.Length - 1; //suffixSequencePeaks = GetSuffixSequencePeaks(suffixPeakArr, suffixStartIndex, suffixEndIndex); //var suffixRankSum = GetSequenceRankSum(suffixSequencePeaks, rankings, peakList); //suffixPval = FitScoreCalculator.GetRankSumPvalue(peakList.Length, suffixSequencePeaks.Count, suffixRankSum); suffixSequence = GetStringSubSequence(_sequence, suffixStartIndex, suffixEndIndex); } return new Tuple<int, int, int, int, string, string>(prefixStartIndex, prefixEndIndex, suffixStartIndex, suffixEndIndex, prefixSequence, suffixSequence); }
public Peak[] GetAllIsotopePeaks(Spectrum spec, Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int[] peakIndexList) { var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); peakIndexList = new int[isotopomerEnvelope.Length]; var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex); var mostAbundantIsotopeMatchedPeakIndex = spec.FindPeakIndex(mostAbundantIsotopeMz, tolerance); if (mostAbundantIsotopeMatchedPeakIndex < 0) return null; var observedPeaks = new Peak[isotopomerEnvelope.Length]; observedPeaks[mostAbundantIsotopeIndex] = spec.Peaks[mostAbundantIsotopeMatchedPeakIndex]; peakIndexList[mostAbundantIsotopeIndex] = mostAbundantIsotopeMatchedPeakIndex; // go down var peakIndex = mostAbundantIsotopeMatchedPeakIndex - 1; for (var isotopeIndex = mostAbundantIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i >= 0; i--) { var peakMz = spec.Peaks[i].Mz; if (peakMz < minMz) { peakIndex = i; break; } if (peakMz <= maxMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } // go up peakIndex = mostAbundantIsotopeMatchedPeakIndex + 1; for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i < spec.Peaks.Length; i++) { var peakMz = spec.Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } return observedPeaks; }
public void TestMs1Filtering() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = // @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.tsv"; @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.decoy.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); //const int minPrecursorCharge = 3; //const int maxPrecursorCharge = 30; //const int tolerancePpm = 15; var tolerance = new Tolerance(15); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 0.7, 40); ////var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //ISequenceFilter ms1Filter = ms1BasedFilter; var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var qValues = tsvReader.GetData("QValue"); var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Annotation"); //var hist = new int[11]; Console.WriteLine("ScanNum\tScore\tPrecursor\tNext\tSum\tNextIsotope\tLessCharge\tMoreCharge\tMax\tNumXicPeaks"); for (var i = 0; i < compositions.Count; i++) { if (qValues != null) { var qValue = Convert.ToDouble(qValues[i]); if (qValue > 0.01) continue; } var scanNum = Convert.ToInt32(scanNums[i]); var composition = Composition.Parse(compositions[i]); var charge = Convert.ToInt32(charges[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; var score = Convert.ToDouble(scores[i]); var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var preIsotopeCorr = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var nextIsotopeCorr = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var xicMostAbundant = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var apexScanNum = xicMostAbundant.GetApexScanNum(); if (apexScanNum < run.MinLcScan) apexScanNum = scanNum; //var sumSpec = run.GetSummedMs1Spectrum(apexScanNum); // var apexIsotopeCorr = sumSpec.GetCorrScore(precursorIon, tolerance, 0.1); // var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; var xicNextIsotope = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz() + Constants.C13MinusC12/charge, tolerance, scanNum); var plusOneIsotopeCorr = xicMostAbundant.GetCorrelation(xicNextIsotope); var precursorIonChargeMinusOne = new Ion(composition, charge - 1); var xicChargeMinusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargeMinusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargeMinusOneCorr = xicMostAbundant.GetCorrelation(xicChargeMinusOne); var precursorIonChargePlusOne = new Ion(composition, charge + 1); var xicChargePlusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargePlusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargePlusOneCorr = xicMostAbundant.GetCorrelation(xicChargePlusOne); //var max = new[] {preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr}.Max(); //Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}", // scanNum, score, preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr, max, xicMostAbundant.Count); } //Console.WriteLine("Histogram"); //for (var i = 0; i < hist.Length; i++) //{ // Console.WriteLine("{0:f1}\t{1}", i / 10.0, hist[i]); //} }
private void GetMatchStatistics(ProductSpectrum ms2Spec, Sequence sequence, int parentIonCharge, StreamWriter writer) { if (ms2Spec == null) return; if (sequence == null) return; var BaseIonTypesCID = new[] { BaseIonType.B, BaseIonType.Y }; var BaseIonTypesETD = new[] { BaseIonType.C, BaseIonType.Z }; var tolerance = new Tolerance(12); var MinProductCharge = 1; var MaxProductCharge = Math.Min(parentIonCharge+2, 20); var baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD; var refIntensity = CompositeScorer.GetRefIntensity(ms2Spec.Peaks); var activationMethodFlag = ms2Spec.ActivationMethod == ActivationMethod.ETD ? 2 : 1; var cleavages = sequence.GetInternalCleavages(); var nComplementaryFrags = 0; var prefixStat = new FragmentStat(); var suffixStat = new FragmentStat(); var minMz = ms2Spec.Peaks.First().Mz; var maxMz = ms2Spec.Peaks.Last().Mz; var cleavageIndex = 0; var preFixIonCheck = new bool[sequence.Count + 1]; var sufFixIonCheck = new bool[sequence.Count + 1]; foreach (var c in cleavages) { var prefixHit = false; var suffixHit = false; foreach (var baseIonType in baseIonTypes) { var stat = baseIonType.IsPrefix ? prefixStat : suffixStat; var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; if (fragmentComposition.Mass < ms2Spec.Peaks[0].Mz) continue; var curFragMass = fragmentComposition.Mass; /*var curObsIonCharge = 0; var curObsIonDist = 1.0d; var curObsIonCorr = 0d; var curObsIonIntensity = 0d; var curObsIonMassError = 0d;*/ var mostAbundantIsotopeIndex = fragmentComposition.GetMostAbundantIsotopeZeroBasedIndex(); var fragmentIonMostAbuMass = fragmentComposition.Mass + Constants.C13MinusC12 * mostAbundantIsotopeIndex; var maxCharge = (int)Math.Floor(fragmentIonMostAbuMass / (minMz - Constants.Proton)); var minCharge = (int)Math.Ceiling(fragmentIonMostAbuMass / (maxMz - Constants.Proton)); if (maxCharge < 1 || maxCharge > MaxProductCharge) maxCharge = MaxProductCharge; if (minCharge < 1 || minCharge < MinProductCharge) minCharge = MinProductCharge; //var ionMatch = false; for (var charge = minCharge; charge <= maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); var isotopePeaks = ms2Spec.GetAllIsotopePeaks(ion, tolerance, 0.1); if (isotopePeaks == null) continue; var distCorr = CompositeScorer.GetDistCorr(ion, isotopePeaks); if (distCorr.Item2 < 0.7 && distCorr.Item1 > 0.03) continue; var mostAbuPeak = isotopePeaks[mostAbundantIsotopeIndex]; var intScore = mostAbuPeak.Intensity / refIntensity; /* if (ionMatch == false || curObsIonIntensity < intScore) { curObsIonCharge = charge; curObsIonCorr = distCorr.Item2; curObsIonDist = distCorr.Item1; curObsIonIntensity = intScore; var mostAbuPeakMz = Ion.GetIsotopeMz(curFragMass, charge, mostAbundantIsotopeIndex); curObsIonMassError = (Math.Abs(mostAbuPeak.Mz - mostAbuPeakMz) / mostAbuPeakMz) * 1e6; //var curObsIonMass = Ion.GetMonoIsotopicMass(mostAbuPeak.Mz, charge, mostAbundantIsotopeIndex); //curObsIonMassError = (Math.Abs(curFragMass - curObsIonMass) / curFragMass) * 1e6; } ionMatch = true; */ var mostAbuPeakMz = Ion.GetIsotopeMz(curFragMass, charge, mostAbundantIsotopeIndex); var curObsIonMassError = (Math.Abs(mostAbuPeak.Mz - mostAbuPeakMz) / mostAbuPeakMz) * 1e6; stat.Count++; stat.Intensity += Math.Min(intScore, 1.0); stat.Corr += distCorr.Item2; stat.Dist += distCorr.Item1; stat.MassError += curObsIonMassError; if (baseIonType.IsPrefix) prefixHit = true; else suffixHit = true; } //if (!ionMatch) continue; } if (prefixHit) preFixIonCheck[cleavageIndex] = true; if (suffixHit) sufFixIonCheck[cleavageIndex] = true; if (prefixHit && suffixHit) { nComplementaryFrags++; } cleavageIndex++; } var preContCount = 0; var sufContCount = 0; for (var i = 0; i < preFixIonCheck.Length - 1; i++) { if (preFixIonCheck[i] && preFixIonCheck[i + 1]) preContCount++; if (sufFixIonCheck[i] && sufFixIonCheck[i + 1]) sufContCount++; } writer.Write(activationMethodFlag); writer.Write("\t"); writer.Write(sequence.Composition.Mass); writer.Write("\t"); writer.Write(sequence.Count); writer.Write("\t"); writer.Write(nComplementaryFrags); writer.Write("\t"); writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", prefixStat.Count, preContCount, prefixStat.Intensity, prefixStat.Corr, prefixStat.Dist, prefixStat.MassError); writer.Write("\t"); writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", suffixStat.Count, sufContCount, suffixStat.Intensity, suffixStat.Corr, suffixStat.Dist, suffixStat.MassError); writer.Write("\n"); }
public ImsScorer GetImsScorer(ImsDataCached imsData, Ion precursorIon) { return new ImsScorer(imsData, precursorIon, _subScoreFactory); }
private double[][][] GetXicArray() { var scanNumberIndex = new Dictionary<int, int>(); var i = 0; foreach (var scanNumber in _run.GetScanNumbers(1)) { // Console.WriteLine(scanNumber + " " + i); scanNumberIndex[scanNumber] = i++; } var xicArray = new double[MaxCharge - MinCharge + 1][][]; // Console.WriteLine("***"); for (var charge = MinCharge; charge <= MaxCharge; charge++) { var xicArrayForThisCharge = new double[_isotopeEnvelope.Length][]; for (var j = 0; j < xicArrayForThisCharge.Length; j++) xicArrayForThisCharge[j] = new double[scanNumberIndex.Count]; var precursorIon = new Ion(_proteinCompositionPlusWater, charge); // Console.Write("precursorCharge " + charge); for (var k = 0; k < xicArrayForThisCharge.Length; k++) { var mz = precursorIon.GetIsotopeMz(k + _minIsotopeIndex); var xic = _run.GetPrecursorExtractedIonChromatogram(mz, _tolerance); //Console.WriteLine("Mz" + mz); foreach (var xicPeak in xic) { // Console.WriteLine(xicPeak.MostAbundantIsotopeMz); xicArrayForThisCharge[k][scanNumberIndex[xicPeak.ScanNum]] = xicPeak.Intensity; // if (k == _maxIntensityIsotopeIndex-_minIsotopeIndex) // Console.WriteLine(charge + "\t" + xicPeak.MostAbundantIsotopeMz + "\t" + xicPeak.Intensity + "\t" + scanNumberIndex[xicPeak.MostAbundantIsotopeMz]); //if(k == 0 && charge == 13) Console.WriteLine(xicPeak.MostAbundantIsotopeMz + " " + xicPeak.Intensity); } // Console.WriteLine(); } // System.Environment.Exit(1); // xicArray[charge - MinCharge] = xicArrayForThisCharge; } return xicArray; }
private void FindMatchedPeaks() { var cleavages = _sequence.GetInternalCleavages(); _prefixIonPeakIndex = new List<int>(); // list of cleavage indices _suffixIonPeakIndex = new List<int>(); _nTheoreticalIonPeaks = 0; _nObservedIonPeaks = 0; int index = 0; // cleavage index foreach (var c in cleavages) { foreach (var baseIonType in _baseIonTypes) { var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; for (var charge = _minCharge; charge <= _maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); int baseIsotopePeakIndex; int nIsotopes; int nMatchedIsotopes; if (FindIon(ion, _tolerance, RelativeIsotopeIntensityThreshold, out baseIsotopePeakIndex, out nIsotopes, out nMatchedIsotopes)) { if (baseIonType.IsPrefix) _prefixIonPeakIndex.Add(baseIsotopePeakIndex); else _suffixIonPeakIndex.Add(baseIsotopePeakIndex); _nObservedIonPeaks++; } //_nObservedIonPeaks += nMatchedIsotopes; //_nTheoreticalIonPeaks += nIsotopes; _nTheoreticalIonPeaks++; } } index++; } }
public void FilteringEfficiencyQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); //const string prot = // "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA"; //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass; //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass))); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var scanNums = tsvReader.GetData("Scan(s)"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("E-value"); var sequences = tsvReader.GetData("Peptide"); //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv"; //var tsvReader = new TsvFileParser(resultFilePath); //var scanNums = tsvReader.GetData("ScanNum"); //var charges = tsvReader.GetData("Charge"); //var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Sequence"); var aaSet = new AminoAcidSet(); var seqSet = new HashSet<string>(); var allSeqSet = new HashSet<string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < scores.Count; i++) { var score = Convert.ToDouble(scores[i]); if (score > 1E-4) continue; //if (score < 10) continue; var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (sequence == null || sequence.Contains("(")) continue; //var sequence = sequences[i]; var composition = aaSet.GetComposition(sequence) + Composition.H2O; var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
/// <summary> /// Gets MS/MS spectra whose isolation windows contain the most abundant peak of the precursorIon /// </summary> /// <param name="precursorIon"></param> /// <returns></returns> public IEnumerable<ProductSpectrum> GetMatchingMs2Spectra(Ion precursorIon) { throw new System.NotImplementedException(); }
private void GetNodeStatistics(bool isDecoy, ProductSpectrum ms2Spec, Sequence sequence, StreamWriter writer) //, StreamWriter mzErrorWriter) { if (ms2Spec == null) return; if (sequence == null) return; //var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity) * 0.01; //refIntensity = Math.Min(ms2Spec.Peaks.Select(p => p.Intensity).Median(), refIntensity); var BaseIonTypesCID = new[] { BaseIonType.B, BaseIonType.Y }; var BaseIonTypesETD = new[] { BaseIonType.C, BaseIonType.Z }; var tolerance = new Tolerance(15); var minCharge = 1; var maxCharge = 20; var baseIonTypes = ms2Spec.ActivationMethod != ActivationMethod.ETD ? BaseIonTypesCID : BaseIonTypesETD; var refIntensity = ms2Spec.Peaks.Max(p => p.Intensity); var activationMethodFlag = ms2Spec.ActivationMethod == ActivationMethod.ETD ? 1 : 2; var cleavages = sequence.GetInternalCleavages(); var prevPrefixFragMass = 0d; var prevPrefixObsIonMass = 0d; var prevPrefixObsIonCharge = 0; var prevPrefixObsIonIntensity = 0d; var prevSuffixFragMass = 0d; var prevSuffixObsIonMass = 0d; var prevSuffixObsIonCharge = 0; var prevSuffixObsIonIntensity = 0d; var nComplementaryFrags = 0; var cleavageIndex = 0; foreach (var c in cleavages) { var bothObs = true; foreach (var baseIonType in baseIonTypes) { var peakType = baseIonType.IsPrefix ? 1 : 2; // unexplained var fragmentComposition = baseIonType.IsPrefix ? c.PrefixComposition + baseIonType.OffsetComposition : c.SuffixComposition + baseIonType.OffsetComposition; var curFragMass = fragmentComposition.Mass; var curObsIonMass = 0d; var curObsIonCharge = 0; var curObsIonDist = 1.0d; var curObsIonCorr = 0d; var curObsIonIntensity = 0d; var ionMatch = false; for (var charge = minCharge; charge <= maxCharge; charge++) { var ion = new Ion(fragmentComposition, charge); var isotopePeaks = ms2Spec.GetAllIsotopePeaks(ion, tolerance, 0.1); if (isotopePeaks == null) continue; var distCorr = AbstractFragmentScorer.GetDistCorr(ion, isotopePeaks); if (distCorr.Item2 < 0.7 && distCorr.Item1 > 0.07) continue; var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var mostAbuPeak = isotopePeaks[mostAbundantIsotopeIndex]; var summedIntensity = isotopePeaks.Where(p => p != null).Sum(p => p.Intensity); var intScore = summedIntensity/refIntensity; //var intScore = mostAbuPeak.Intensity / medIntensity; //var intScore = summedIntensity / refIntensity; if (ionMatch == false || curObsIonIntensity < intScore) { curObsIonMass = Ion.GetMonoIsotopicMass(mostAbuPeak.Mz, charge, mostAbundantIsotopeIndex); curObsIonCharge = charge; curObsIonCorr = distCorr.Item2; curObsIonDist = distCorr.Item1; curObsIonIntensity = intScore; } ionMatch = true; } if (!ionMatch) { bothObs = false; continue; } writer.Write(activationMethodFlag); writer.Write("\t"); writer.Write(peakType); writer.Write("\t"); writer.Write("{0:0.000}", curFragMass); writer.Write("\t"); writer.Write("{0}", curObsIonCharge); writer.Write("\t"); writer.Write("{0:0.000}", curObsIonDist); writer.Write("\t"); writer.Write("{0:0.000}", curObsIonCorr); writer.Write("\t"); writer.Write("{0:0.000}", curObsIonIntensity); writer.Write("\t"); writer.Write("{0:0.000}", (Math.Abs(curFragMass - curObsIonMass)/curFragMass)*1e6); writer.Write("\n"); // mz error output /* if (baseIonType.IsPrefix && prevPrefixFragMass > 0 & prevPrefixObsIonMass > 0) { var aaMass = Math.Abs(prevPrefixFragMass - curFragMass); var massError = Math.Abs(Math.Abs(prevPrefixObsIonMass - curObsIonMass) - aaMass); var massErrorPpm = (massError / curObsIonMass) * 1e6; mzErrorWriter.WriteLine("{0}\t{1:0.000}\t{2}", activationMethodFlag, massErrorPpm, Math.Abs(prevPrefixObsIonCharge - curObsIonCharge)); } else if (prevSuffixFragMass > 0 & prevSuffixObsIonMass > 0) { var aaMass = Math.Abs(prevSuffixFragMass - curFragMass); var massError = Math.Abs(Math.Abs(prevSuffixObsIonMass - curObsIonMass) - aaMass); var massErrorPpm = (massError / curObsIonMass) * 1e6; mzErrorWriter.WriteLine("{0}\t{1:0.000}\t{2}", activationMethodFlag, massErrorPpm, Math.Abs(prevSuffixObsIonCharge - curObsIonCharge)); } */ if (baseIonType.IsPrefix) { prevPrefixFragMass = curFragMass; prevPrefixObsIonMass = curObsIonMass; prevPrefixObsIonCharge = curObsIonCharge; prevPrefixObsIonIntensity = curObsIonIntensity; //Array.Copy(curObsIonMass, prevPrefixObsIonMass, curObsIonMass.Length); } else { prevSuffixFragMass = curFragMass; prevSuffixObsIonMass = curObsIonMass; prevSuffixObsIonCharge = curObsIonCharge; prevSuffixObsIonIntensity = curObsIonIntensity; //Array.Copy(curObsIonMass, prevSuffixObsIonMass, curObsIonMass.Length); } } if (bothObs) { //pairWriter.Write("{0}\t{1}\t", prevPrefixObsIonIntensity, prevSuffixObsIonIntensity); nComplementaryFrags++; } cleavageIndex++; } Console.WriteLine("{0}\t{1}", nComplementaryFrags, sequence.Count); //if (!isDecoy) Console.WriteLine("{0}", totalExplainedAbundanceRatio); }
public void TestTopDownScoring() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string protAnnotation = "A.AHAHLTHQYPAANAQVTAAPQAITLNFSEGVETGFSGAKITGPKNENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK."; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List<SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; //var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); //Console.WriteLine(aaSet.GetAminoAcid('C').GetComposition()); // Create a sequence graph //var protSeq = protAnnotation.Substring(2, protAnnotation.Length - 4); var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); // TopDownScorer.MaxCharge = 60; // TopDownScorer.MinCharge = 3; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); foreach (var protComposition in seqGraph.GetSequenceCompositions()) { var mostAbundantIsotopeIndex = protComposition.GetMostAbundantIsotopeZeroBasedIndex(); Console.WriteLine("Composition\t{0}", protComposition); Console.WriteLine("MostAbundantIsotopeIndex\t{0}", mostAbundantIsotopeIndex); Console.WriteLine(new Ion(protComposition + Composition.H2O, 11).GetIsotopeMz(mostAbundantIsotopeIndex)); Console.WriteLine(); //for (var charge = TopDownScorer.MinCharge; charge <= TopDownScorer.MaxCharge; charge++) //{ var scorer = new TopDownScorer(protComposition, run, precursorTolerance, null); var score = scorer.GetScore(); Console.WriteLine(score); //var precursorIon = new Ion(protComposition + Composition.H2O, charge); //var xic = run.GetExtractedIonChromatogram(precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex), precursorTolerance); //Console.WriteLine(xic[0].ScanNum + " " + xic[1].ScanNum); //Console.WriteLine("ScanNum\t{0}", string.Join("\t", xic.Select(p => p.ScanNum.ToString()))); //Console.WriteLine("precursorCharge " + charge + "\t" + string.Join("\t", xic.Select(p => p.Intensity.ToString()))); // } Console.WriteLine("\nCharge\tm/z"); for (var charge = 9; charge <= 18; charge++) { var precursorIon = new Ion(protComposition + Composition.H2O, charge); Console.WriteLine("{0}\t{1}", charge, precursorIon.GetIsotopeMz(mostAbundantIsotopeIndex)); } } // sw.Stop(); // Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void FilteringEfficiency() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm); // //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run, // minPrecursorCharge, maxPrecursorCharge, // 0, 0, // 600.0, 1800.0, new Tolerance(tolerancePpm), null); //ms1BasedFilter.CachePrecursorMatchesBinCentric(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("Score"); var qvalues = tsvReader.GetData("QValue"); var sequences = tsvReader.GetData("Sequence"); var sequenceCount = new Dictionary<string, int>(); for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) continue; } else { var score = Convert.ToDouble(scores[i]); if (score < 13) continue; } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; var sequence = sequences[i]; int count; if (sequenceCount.TryGetValue(sequence, out count)) sequenceCount[sequence] = count + 1; else sequenceCount[sequence] = 1; } //var sequences = tsvReader.GetData("Annotation"); var seqSet = new HashSet<string>(); var allSeqSet = new HashSet<string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) continue; } else { var score = Convert.ToDouble(scores[i]); if (score < 13) continue; } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) continue; ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance); ////xic.Display(); //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false); //var apexSpec = run.GetSpectrum(apexScanNum); //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons/(double)(maxBinNum-minBinNum+1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs/(double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count/(double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }