internal HashSet <int> GetPotentialChargeState(int indexOfCurrentPeak, List <Peak> msPeakList, double ppmTolerance) { //List<IsotopicProfile> potentialProfiles = new List<IsotopicProfile>(); CreatePeaksIfNeeded();//note: does not actually create peaks. Only loads them. An exception is thrown if it's not there. var chargeStates = new HashSet <int>(); var charge = 1; for (; charge < 10; charge++) { var mzPeak1 = msPeakList.ElementAt(indexOfCurrentPeak).XValue; var distanceToFindNextPeak = 1.0 / charge; var xValueToLookFor = mzPeak1 + distanceToFindNextPeak; var lowerMZ = xValueToLookFor - ppmTolerance * xValueToLookFor / 1e6; var upperMZ = xValueToLookFor + ppmTolerance * xValueToLookFor / 1e6; var peak2 = msPeakList.Find(peak => peak.XValue <= upperMZ && peak.XValue >= lowerMZ); if (peak2 == null) { continue; } var mzPeak2 = peak2.XValue; var correlation = getCorrelation(mzPeak1, mzPeak2, ppmTolerance, ppmTolerance); if (correlation > 0.2) { chargeStates.Add(charge); } } var reportString521 = "Potential Charges using correlation: "; foreach (var currentCharge in chargeStates) { reportString521 += currentCharge + "\t"; } IqLogger.LogTrace(reportString521 + "\n"); return(chargeStates); }
public virtual void InitializeWorkflow() { Check.Assert(Run != null, "Cannot initialize workflow. Run is null"); if (Run == null) { return; } Check.Assert(NewDeconToolsParameters != null, "Cannot initialize workflow. Parameters are null"); Run.ResultCollection.ResultType = GetResultType(); InitializeParameters(); CreateOutputFileNames(); WriteProcessingInfoToLog(); CreateTargetMassSpectra(); ExecutePreprocessHook(); InitializeProcessingTasks(); if (_deconvolutorRequiresPeaksFile) { //new iThrash deconvolutor uses the _peaks.txt file. So need to check for it and create it if necessary var peaksFileExists = CheckForPeaksFile(OutputDirectoryPath); if (!peaksFileExists) { IqLogger.LogMessage("Creating _peaks.txt file. Takes 1 to 5 minutes."); CreatePeaksFile(NewDeconToolsParameters.PeakDetectorParameters, OutputDirectoryPath); } IqLogger.LogMessage("Loading _peaks.txt file into memory. Takes 0 - 30 seconds" + Environment.NewLine); LoadPeaks(OutputDirectoryPath); } }
private void SetupLogging() { string loggingFolder; if (string.IsNullOrEmpty(Parameters.OutputFolderBase)) { loggingFolder = GetDefaultOutputFolder(); } else { loggingFolder = Path.Combine(Parameters.OutputFolderBase, "IqLogs"); } if (!Directory.Exists(loggingFolder)) { Directory.CreateDirectory(loggingFolder); } IqLogger.LogDirectory = loggingFolder; IqLogger.InitializeIqLog(_run.DatasetName); }
/// <summary> /// The main Thrash algorithm. /// </summary> /// <param name="originalXYData">Mass spec XY data</param> /// <param name="msPeakList">Mass spec peak data</param> /// <param name="backgroundIntensity"></param> /// <param name="minPeptideIntensity"></param> /// <param name="minMSFeatureToBackgroundRatio"></param> /// <returns>List of isotopic profiles</returns> public List <IsotopicProfile> PerformThrash(XYData originalXYData, List <Peak> msPeakList, double backgroundIntensity = 0, double minPeptideIntensity = 0, double minMSFeatureToBackgroundRatio = 3) { var isotopicProfiles = new List <IsotopicProfile>(); if (Parameters.AreAllTheoreticalProfilesCachedBeforeStarting) { CreateAllTheoreticalProfilesForMassRange(); } var minMSFeatureIntensity = backgroundIntensity * minMSFeatureToBackgroundRatio; var xyData = new XYData { Xvalues = originalXYData.Xvalues, Yvalues = originalXYData.Yvalues }; var sortedPeakList = new List <Peak>(msPeakList).OrderByDescending(p => p.Height).ToList(); var peaksAlreadyProcessed = new HashSet <Peak>(); var sb = new StringBuilder(); var listOfMonoMZs = new SortedDictionary <int, double>(); var currentUniqueMSFeatureIDNum = 0; var peakCounter = -1; foreach (var msPeak in sortedPeakList) { //if (msPeak.XValue > 579.53 && msPeak.XValue < 579.54) //{ // int x = 90; //} var indexOfCurrentPeak = msPeakList.IndexOf(msPeak); if (peaksAlreadyProcessed.Contains(msPeak)) { continue; } var peakIsBelowIntensityThreshold = (msPeak.Height < minMSFeatureIntensity); if (peakIsBelowIntensityThreshold) { break; } peakCounter++; if (peakCounter == 465) { // Console.WriteLine(peakCounter); } //get potential charge states var ppmTolerance = (msPeak.Width / 2.35) / msPeak.XValue * 1e6; // peak's sigma value / mz * 1e6 HashSet <int> potentialChargeStates; if (UseAutoCorrelationChargeDetermination) { var chargeState = PattersonChargeStateCalculator.GetChargeState(xyData, msPeakList, msPeak as MSPeak); potentialChargeStates = new HashSet <int> { chargeState }; } else { //Paul subtraction IqLogger.LogTrace("MZ value: " + msPeak.XValue + "\n"); potentialChargeStates = GetPotentialChargeStates(indexOfCurrentPeak, msPeakList, ppmTolerance); #region Paul Addition var chargeDecider = new ChromCorrelatingChargeDecider(_run); chargeDecider.GetPotentialChargeState(indexOfCurrentPeak, msPeakList, ppmTolerance); #endregion } var reportString201 = "potentialChargeStates: "; foreach (var charge in potentialChargeStates) { reportString201 += charge + "\t"; } IqLogger.LogTrace(reportString201 + "\n"); var potentialMSFeaturesForGivenChargeState = new List <IsotopicProfile>(); foreach (var potentialChargeState in potentialChargeStates) { var bestFitVal = 1.0; // 1.0 is worst fit value. Start with 1.0 and see if we can find better fit value //TODO: there could be a problem here var msFeature = GetMSFeature(msPeakList, xyData, potentialChargeState, msPeak, ref bestFitVal, out var theorIso); if (msFeature != null) { msFeature.Score = bestFitVal; msFeature.IntensityMostAbundant = msFeature.getMostIntensePeak().Height; var indexMostAbundantPeakTheor = theorIso.GetIndexOfMostIntensePeak(); //Paul edit. "&& indexMostAbundantPeakTheor>=0" if (msFeature.Peaklist.Count > indexMostAbundantPeakTheor && indexMostAbundantPeakTheor >= 0) { msFeature.IntensityMostAbundantTheor = msFeature.Peaklist[indexMostAbundantPeakTheor].Height; } else { msFeature.IntensityMostAbundantTheor = msFeature.IntensityMostAbundant; } var msFeatureAlreadyPresentInAnotherChargeState = listOfMonoMZs.ContainsValue(msFeature.MonoPeakMZ); if (!msFeatureAlreadyPresentInAnotherChargeState) { potentialMSFeaturesForGivenChargeState.Add(msFeature); } else { //Console.WriteLine( "Nope... not using this charge state... MSFeature already found with same MonoMZ. \tcurrent peak= \t" +msPeak.XValue.ToString("0.0000") + "\tmsfeature= " + msFeature); } } } IsotopicProfile isoProfile; if (potentialMSFeaturesForGivenChargeState.Count == 0) { sb.Append(msPeak.XValue.ToString("0.00000") + "\tNo profile found.\n"); isoProfile = null; } else if (potentialMSFeaturesForGivenChargeState.Count == 1) { isoProfile = potentialMSFeaturesForGivenChargeState[0]; sb.Append(msPeak.XValue.ToString("0.00000") + "\t" + isoProfile.MonoPeakMZ.ToString("0.0000") + "\t" + isoProfile.ChargeState + "\t" + isoProfile.Score + "\t" + ppmTolerance + "\n"); } else { sb.Append("Multiple candidates found...." + "\n"); foreach (var isotopicProfile in potentialMSFeaturesForGivenChargeState) { sb.Append(msPeak.XValue.ToString("0.00000") + "\t" + isotopicProfile.MonoPeakMZ.ToString("0.0000") + "\t" + isotopicProfile.ChargeState + "\t" + isotopicProfile.Score + "\t" + ppmTolerance + "\n"); } sb.Append(Environment.NewLine); if (Parameters.CheckAllPatternsAgainstChargeState1) { isoProfile = potentialMSFeaturesForGivenChargeState.FirstOrDefault(n => n.ChargeState == 1); } else { #region Paul addition //TODO: [Paul] This is the major means of deciding between charge states and where we need to do better. //We need some test cases to capture this problem. var stopwatch = new Stopwatch(); if (doPaulMethod) { var peaksNotLoaded = _run.ResultCollection.MSPeakResultList == null || _run.ResultCollection.MSPeakResultList.Count == 0; if (peaksNotLoaded) { stopwatch.Start(); LoadPeaks(_run); stopwatch.Stop(); IqLogger.LogDebug("stopwatch: " + stopwatch.Elapsed); } var brain = new ChromCorrelatingChargeDecider(_run); isoProfile = brain.DetermineCorrectIsotopicProfile(potentialMSFeaturesForGivenChargeState.Where(n => n.Score < .50).ToList()) ?? brain.DetermineCorrectIsotopicProfile(potentialMSFeaturesForGivenChargeState); //hitcounter2++; } else//do it the regular way. { #endregion isoProfile = (from n in potentialMSFeaturesForGivenChargeState where n.Score < 0.15 orderby n.ChargeState descending select n).FirstOrDefault() ?? (from n in potentialMSFeaturesForGivenChargeState orderby n.Score select n).First(); #region Paul Addition } //line outputs. var reportString309 = "\nM/Z = " + isoProfile.MonoPeakMZ + "\nCHOSEN CHARGE: " + isoProfile.ChargeState + "\n\n"; IqLogger.LogTrace(reportString309); //tabular output //string reportString309 = "\tM/Z = \t" + msfeature.MonoPeakMZ + // "\tCHOSEN CHARGE: \t" + msfeature.ChargeState+ "\n"; //IqLogger.Log.Debug(reportString309); #endregion } } if (isoProfile != null) { listOfMonoMZs.Add(currentUniqueMSFeatureIDNum, isoProfile.MonoPeakMZ); currentUniqueMSFeatureIDNum++; isotopicProfiles.Add(isoProfile); //hitcounter++;//Paul Addition foreach (var peak in isoProfile.Peaklist) { //For debugging //if (peak.XValue > 534.76515 && peak.XValue < 534.78515) //(peak.XValue>579.62 && peak.XValue<579.65) || (peak.XValue>579.75 && peak.XValue<579.8)) //{ // int x = 39843; //} peaksAlreadyProcessed.Add(peak); } } }//end of foreach peak loop //Console.WriteLine(sb.ToString()); var uniqueIsotopicProfiles = removeDuplicatesFromFoundMSFeatures(isotopicProfiles); #region Paul Addition //IqLogger.Log.Debug("Hit counter: " + hitcounter); //IqLogger.Log.Debug("Hit counter2: " + hitcounter2); //var uniqueOtherIsotopicProfiles = removeDuplicatesFromFoundMSFeatures(isotopicProfiles); //IqLogger.Log.Debug("old non unique count: " + otherIsotopicProfiles.Count + "\n" + // "new non unique count: " + myIsotopicProfiles.Count + "\n"); //var uniqueMyIsotopicProfiles = removeDuplicatesFromFoundMSFeatures(myIsotopicProfiles); //IqLogger.Log.Debug("\nOld unique profile count: " + uniqueOtherIsotopicProfiles.Count + "\n" + // "New unique profile count: " + uniqueMyIsotopicProfiles.Count); //IqLogger.Log.Debug("\nunique profile count: " + uniqueIsotopicProfiles.Count + "\n"); #endregion //NOTE: we don't need to do the reordering, but I do this so I can compare to the old THRASH uniqueIsotopicProfiles = uniqueIsotopicProfiles.OrderByDescending(p => p.IntensityMostAbundantTheor).ToList(); return(uniqueIsotopicProfiles); }
public override IsotopicProfile DetermineCorrectIsotopicProfile(List <IsotopicProfile> potentialIsotopicProfiles) { if (null == potentialIsotopicProfiles || potentialIsotopicProfiles.Count == 0) { return(null); } CreatePeaksIfNeeded();//note: does not actually create peaks. Only loads them. An exception is thrown if it's not there. potentialIsotopicProfiles = potentialIsotopicProfiles.OrderByDescending(n => n.ChargeState).ToList(); var chargeStates = (from prof in potentialIsotopicProfiles select prof.ChargeState).ToArray(); var correlations = new double[chargeStates.Length]; var correlationsWithAltChargeState = new double[chargeStates.Length, 2]; // [Index, How many other states to try] var indexCurrentFeature = -1; double bestScore = -1; var index = potentialIsotopicProfiles.First().MonoIsotopicPeakIndex; if (index == -1) { index = 0; } //string reportString1 = "\tM/Z : \t" + potentialIsotopicProfiles.First().Peaklist[index].XValue + "\t"; //IqLogger.Log.Debug(reportString1); //with line spaces var reportString1 = "\nM/Z : " + potentialIsotopicProfiles.First().Peaklist[index].XValue + "\n"; IqLogger.LogTrace(reportString1); foreach (var potentialFeature in potentialIsotopicProfiles) { indexCurrentFeature++; var correlation = GetCorrelation(potentialFeature); var chargesToTry = GetChargesToTry(potentialFeature); for (var i = 0; i < chargesToTry.Length; i++) { correlationsWithAltChargeState[indexCurrentFeature, i] = GetCorrelationWithAnotherChargeState(potentialFeature, chargesToTry[i]); } //lines output var reportString = "\nCHARGE: " + potentialFeature.ChargeState + "\n" + "CORRELATION: " + correlation + "\n"; for (var i = 0; i < chargesToTry.Length; i++) { reportString += "charge " + chargesToTry[i] + " (M/Z =" + GetMZOfAnotherChargeState(potentialFeature, index, chargesToTry[i]) + ") correlation: " + correlationsWithAltChargeState[indexCurrentFeature, i] + "\n"; } reportString += "Score: " + potentialFeature.Score; IqLogger.LogTrace(reportString); //tabular output //string reportString = "\tCHARGE: " + potentialFeature.ChargeState + "\t" + // "CORRELATION: \t" + correlation + "\t"; //for (int i = 0; i < chargesToTry.Length; i++) //{ // reportString += "charge \t" + chargesToTry[i] + "\t (M/Z =\t" + GetMZOfAnotherChargeState(potentialFeature, index, chargesToTry[i]) + "\t) correlation: \t" + // correlationsWithAltChargeState[indexCurrentFeature, i] + "\t"; //} //reportString += "Score: \t" + potentialFeature.Score + "\t"; //IqLogger.Log.Debug(reportString); correlations[indexCurrentFeature] = correlation; if (bestScore < correlation) { bestScore = correlation; } } return(GetIsotopicProfileMethod1(chargeStates, correlations, correlationsWithAltChargeState, potentialIsotopicProfiles)); }
private IsotopicProfile GetIsotopicProfileMethod1(int[] chargeStates, IReadOnlyList <double> correlations, double[,] correlationsWithAltChargeState, IReadOnlyCollection <IsotopicProfile> potentialIsotopicProfiles) { var standardDeviationOfEachSet = new double[correlations.Count]; var averageCorrOfEachSet = new double[correlations.Count]; var chargeStateSets = new List <int> [correlations.Count]; var contendingCharges = chargeStates.ToList();//new List<int>(); #region Metric 1, altCharge present var favorites = new Dictionary <int, double>(); foreach (var contender in contendingCharges) { var contenderIndex = Array.IndexOf(chargeStates, contender); var anotherChargeStateExists = AnotherChargeStateExists(contenderIndex, correlationsWithAltChargeState); if (anotherChargeStateExists) { //TODO: and no correlations of other non-factor charge states. var correlationList = new double[correlationsWithAltChargeState.GetLength(1)]; for (var i = 0; i < correlationList.Length; i++) { correlationList[i] = correlationsWithAltChargeState[contenderIndex, i]; } var bestAltCorr = correlationList.Max(); favorites.Add(contenderIndex, bestAltCorr); } } if (favorites.Count > 0) { return(potentialIsotopicProfiles.ElementAt((favorites.OrderByDescending(x => x.Value).First().Key))); } #endregion #region Metric 2, stand dev for (var i = 0; i < correlations.Count; i++) { var indexesWhoAreFactorsOfMe = GetIndexesWhoAreAFactorOfMe(i, chargeStates); if (null == indexesWhoAreFactorsOfMe) { break; //null means that we are at the end of the set. st dev is already defaulted at 0, which is what it //would be to take the st. dev of one item. } var length = indexesWhoAreFactorsOfMe.Count + 1; var arrayOfCorrelationsInSet = new double[length]; arrayOfCorrelationsInSet[0] = correlations[i]; for (var i2 = 1; i2 < length; i2++) { arrayOfCorrelationsInSet[i2] = correlations[indexesWhoAreFactorsOfMe.ElementAt(i2 - 1)]; } chargeStateSets[i] = GetSet(i, indexesWhoAreFactorsOfMe, chargeStates); standardDeviationOfEachSet[i] = MathNet.Numerics.Statistics.Statistics.StandardDeviation(arrayOfCorrelationsInSet); averageCorrOfEachSet[i] = MathNet.Numerics.Statistics.Statistics.Mean(arrayOfCorrelationsInSet); var correlationThreshold = 0.49; var standardDeviationThreshold = .3; if (standardDeviationOfEachSet[i] < standardDeviationThreshold && correlations[i] > correlationThreshold) // Dangerous 0.05 and .7 { foreach (var index in indexesWhoAreFactorsOfMe) { contendingCharges.Remove(chargeStates[index]); } } if (contendingCharges.Count == 1)//if there is only one left after it's own factors are removed, it's that one. { // IqLogger.Log.Debug("\nWas only one contender\n"); return(potentialIsotopicProfiles.ElementAt(Array.IndexOf(chargeStates, contendingCharges.First()))); } } #endregion #region Metric 3, ask Patterson var chargeStateCalculator = new PattersonChargeStateCalculatorWithChanges(); var chargeState = chargeStateCalculator.GetChargeState(_run.XYData, _run.PeakList, potentialIsotopicProfiles.First().getMonoPeak()); IqLogger.LogDebug("had to use the patterson calculator and this is what it gave me: " + chargeState); foreach (var charge in chargeStates) { IqLogger.LogDebug(charge + "\t"); } IqLogger.LogDebug("Charge state length: " + chargeStates.Length); if (chargeStates.Contains(chargeState)) { IqLogger.LogDebug(Array.IndexOf(chargeStates, chargeState).ToString()); return(potentialIsotopicProfiles.ElementAt(Array.IndexOf(chargeStates, chargeState))); } var bestChargeIndex = -1; var bestCorrelation = -6.0;//arbitrary negative foreach (var charge in contendingCharges) { var index = Array.IndexOf(chargeStates, charge); if (bestCorrelation < correlations[index]) { bestCorrelation = correlations[index]; bestChargeIndex = index; } } if (bestCorrelation <= 0) { return(null); } return(potentialIsotopicProfiles.ElementAt(bestChargeIndex)); #endregion }