//Do not use in parallel as the gappedItems sorting will screw up public List <ScoredSequence> DetailedDenovoSequencing(PatternTools.MSParser.MSFull theMS) { MSFull myMS = PatternTools.ObjectCopier.Clone(theMS); PrepareMSForDeNovo(myMS); List <GraphNode> allNodes = GetNodes(myMS); List <List <GraphNode> > allPaths = new List <List <GraphNode> >(); foreach (GraphNode g in allNodes) { //foreach (List<GraphNode> dp in g.DownPaths) { // dp.Insert(0, g); //} allPaths.AddRange(g.DownPaths); } allPaths = allPaths.Distinct().ToList(); allPaths.Sort((a, b) => EvaluatePathForIntensity(b).CompareTo(EvaluatePathForIntensity(a))); List <ScoredSequence> theResults = new List <ScoredSequence>(); foreach (List <GraphNode> p in allPaths) { double avgPPM = EvaluatePathForPPMError(p); double intensity = EvaluatePathForIntensity(p); int sequentialScore = p.Count; string sequence = GetSequence(p); ScoredSequence s = new ScoredSequence(); s.AvgPPMError = Math.Round(avgPPM, 1); s.Sequence = sequence; s.TotalIntensity = Math.Round(intensity, 0); s.SequentialScore = sequentialScore; theResults.Add(s); } int removed = theResults.RemoveAll(a => a.Sequence.Equals("")); Console.WriteLine("Removing bad results: " + removed); Dictionary <string, List <ScoredSequence> > cluster = (from result in theResults group result by result.Sequence into resultGroup select new { theRegex = resultGroup.Key, results = resultGroup }).ToDictionary(a => a.theRegex, a => a.results.ToList()); theResults.Clear(); foreach (KeyValuePair <string, List <ScoredSequence> > kvp in cluster) { kvp.Value.Sort((a, b) => b.TotalIntensity.CompareTo(a.TotalIntensity)); theResults.Add(kvp.Value[0]); } return(theResults); }
public static SpectrumComparisonResult Do(List <TheTests> theTests, List <PredictedIon> theoretical, List <Ion> experimentalPeaks, double ppm, int cleanedPeptideSequenceLength, double relativeIntensityThreshold) { PatternTools.MSParser.MSFull ms = new MSFull(); ms.MSData = experimentalPeaks; ms.TotalIonIntensity = experimentalPeaks.Sum(a => a.Intensity); return(Do(theTests, theoretical, ms, ppm, cleanedPeptideSequenceLength, relativeIntensityThreshold)); }
public void PlotSpectrum(MSParserLight.MSLight mslight, double ppm, string peptideSequence, List <ModificationItem> myMods, bool etdMode) { MSFull msFull = new MSFull(); for (int i = 0; i < mslight.MZ.Count; i++) { msFull.MSData.Add(new Ion(mslight.MZ[i], mslight.Intensity[i], 0, mslight.ScanNumber)); } PlotSpectrum(msFull, ppm, peptideSequence, myMods, etdMode); }
private void PrepareMSForDeNovo(MSFull myMS) { double maxIntensity = myMS.MSData.Max(b => b.Intensity); myMS.MSData.RemoveAll(a => a.Intensity < intensityPercentageCuttoff * maxIntensity); myMS.MSData.Sort((a, b) => a.MZ.CompareTo(b.MZ)); myMS.MSData.Insert(0, new Ion(1.007825, maxIntensity, 0, 0)); foreach (double d in myMS.DechargedPrecursorsFromZLine) { myMS.MSData.Add(new Ion(d, 1, 0, 0)); } }
public void PlotSpectrum(MSFull ms, double ppm, string peptideSequence, List <ModificationItem> myMods, bool etdMode) { msViewer1.MyMS = ms; msViewer1.SetMS2PPM = ppm; msViewer1.PeptideSequence = peptideSequence; msViewer1.Modifications = myMods; if (etdMode) { msViewer1.SetToCIDMode(); } else { msViewer1.SetToETDMode(); } msViewer1.SetToCIDMode(); msViewer1.Plot(); }
//Optimized for finding most intense sequence public double FastMaxSequentialScoreCoverage(PatternTools.MSParser.MSFull theMS) { MSFull myMS = PatternTools.ObjectCopier.Clone(theMS); PrepareMSForDeNovo(myMS); List <GraphNode> allNodes = GetNodes(myMS); List <List <GraphNode> > allPaths = new List <List <GraphNode> >(); foreach (GraphNode g in allNodes) { allPaths.AddRange(g.DownPaths); } allPaths = allPaths.Distinct().ToList(); allPaths.Sort((a, b) => EvaluatePathForIntensity(b).CompareTo(EvaluatePathForIntensity(a))); double coverage = allPaths[0].Max(a => a.MZ) - allPaths[0].Min(a => a.MZ); return(coverage); }
private List <GraphNode> GetNodes(MSFull myMS) { List <GraphNode> allNodes = new List <GraphNode>(myMS.MSData.Count); //Create the Node Array for (int i = 0; i < myMS.MSData.Count; i++) { allNodes.Add(new GraphNode(Math.Round(myMS.MSData[i].MZ, 3), myMS.MSData[i].Intensity, i));; } //Include the Child Links for (int i = 0; i < allNodes.Count; i++) { for (int j = i; j < allNodes.Count; j++) { if (j == i) { continue; } double delta = allNodes[j].MZ - allNodes[i].MZ; if (delta > largestGap) { break; } double thisStep = allNodes[i].MZ + delta; //List<GapItem> gp = (from g in gapItems // orderby (PatternTools.pTools.PPM(thisStep, allNodes[i].MZ + g.GapSize)) // select g).ToList(); foreach (GapItem gp in gapItems) { double ppm2; if ((ppm2 = PatternTools.pTools.PPM(allNodes[i].MZ + delta, allNodes[i].MZ + gp.GapSize)) < ppmTolerance) { GraphNode.GraphNodeLink gl = new GraphNode.GraphNodeLink(j, ppm2, gp, allNodes[j].Intensity); allNodes[i].ChildLinks.Add(gl); } } } } //Now lets sort all childlinks by error cost foreach (GraphNode g in allNodes) { //g.ChildLinks.Sort((a, b) => b.Intensity.CompareTo(a.Intensity)); g.ChildLinks.Sort((a, b) => a.ErrorCostPPM.CompareTo(b.ErrorCostPPM)); g.DownPaths = new List <List <GraphNode> >(); g.DownPaths.Add(new List <GraphNode>()); } List <double> bestIntensities = new List <double>(resultBuffer); bestIntensities.Add(0); for (int i = allNodes.Count - 1; i >= 0; i--) { GraphNode rNode = allNodes[i]; if (rNode.ChildLinks.Count == 0) { continue; } foreach (GraphNode.GraphNodeLink gnl in rNode.ChildLinks) { List <List <GraphNode> > pathsToTrim = new List <List <GraphNode> >(); foreach (List <GraphNode> path in allNodes[gnl.TheNode].DownPaths) { List <GraphNode> newPath = new List <GraphNode>() { allNodes[gnl.TheNode] }; newPath.AddRange(path); double downIntensity = EvaluatePathForIntensity(newPath); double bestIntensitiesMin = bestIntensities.Min(); if (downIntensity > bestIntensitiesMin) { bestIntensities.Add(downIntensity); rNode.DownPaths.Add(newPath); if (bestIntensities.Count > resultBuffer) { bestIntensities.Remove(bestIntensitiesMin); } } else { pathsToTrim.Add(path); } } if (pathsToTrim.Count > 0) { //allNodes[gnl.TheNode].DownPaths = allNodes[gnl.TheNode].DownPaths.Except(pathsToTrim).ToList(); } } } foreach (GraphNode g in allNodes) { foreach (List <GraphNode> l in g.DownPaths) { l.Insert(0, g); } //I dont understand why this does not work //g.DownPaths.ForEach(a => a.Insert(0, g.MyIndex)); } return(allNodes); }
public static SpectrumComparisonResult Do(List <TheTests> theTests, List <PredictedIon> theoretical, MSFull experimentalTmp, double ppm, int cleanedPeptideSequenceLength, double relativeIntensityThreshold) { SpectrumComparisonResult scr = new SpectrumComparisonResult(); //Make sure we dont screw up the original spectrum MSFull experimental = PatternTools.ObjectCopier.Clone(experimentalTmp); double maxIntensity = experimental.MSData.Max(a => a.Intensity); experimental.MSData.RemoveAll(a => a.Intensity / maxIntensity < relativeIntensityThreshold); bool allTests = false; if (theTests.Contains(TheTests.AllTests)) { allTests = true; } //Just to make sure foreach (PredictedIon p in theoretical) { p.Matched = false; } //Before all find all matched theoretical and experimental List <Ion> matchedIons = new List <Ion>(100); foreach (PredictedIon pi in theoretical) { List <Ion> theIons = experimental.MSData.FindAll(a => PatternTools.pTools.PPM(a.MZ, pi.MZ) < ppm); if (theIons.Count > 0) { matchedIons.AddRange(theIons); pi.Matched = true; } } matchedIons = matchedIons.Distinct().ToList(); if (theTests.Contains(TheTests.SignalPercentage) || allTests) { scr.SignalPercentage = Math.Round(matchedIons.Sum(a => a.Intensity) / experimental.TotalIonIntensity, 6); } if (theTests.Contains(TheTests.ACount) || allTests) { scr.ACount = SequentialScore(theoretical, IonSeries.A); } if (theTests.Contains(TheTests.BCount) || allTests) { scr.BCount = SequentialScore(theoretical, IonSeries.B); } if (theTests.Contains(TheTests.CCount) || allTests) { scr.CCount = SequentialScore(theoretical, IonSeries.C); } if (theTests.Contains(TheTests.XCount) || allTests) { scr.XCount = SequentialScore(theoretical, IonSeries.X); } if (theTests.Contains(TheTests.YCount) || allTests) { scr.YCount = SequentialScore(theoretical, IonSeries.Y); } if (theTests.Contains(TheTests.ZCount) || allTests) { scr.ZCount = SequentialScore(theoretical, IonSeries.Z); } if (theTests.Contains(TheTests.ComplementaryPairsAX) || allTests) { scr.ComplementaryPairsAX = CountComplementaryPairs(scr.ACount, scr.XCount, cleanedPeptideSequenceLength); } if (theTests.Contains(TheTests.ComplementaryPairsBY) || allTests) { scr.ComplementaryPairsBY = CountComplementaryPairs(scr.BCount, scr.YCount, cleanedPeptideSequenceLength); } if (theTests.Contains(TheTests.ComplementaryPairsCZ) || allTests) { scr.ComplementaryPairsCZ = CountComplementaryPairs(scr.CCount, scr.ZCount, cleanedPeptideSequenceLength); } return(scr); }