/// <summary> /// Retrieves the ratio of isomers within a bunch of queries (precursor elution curve ) /// </summary> /// <param name="spikes"></param> /// <param name="mixedPrecursor"></param> /// <returns></returns> public static Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> GetRatios(Dictionary <double, Dictionary <Sample, CharacterizedPrecursor> > spikes, MixedPrecursor mixedPrecursor, DBOptions dbOptions, int nbMinFragments, int nbMaxFragments) { Dictionary <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>, double> DicOfCurveErrors = new Dictionary <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>, double>(); for (int nbProductsToKeep = nbMinFragments; nbProductsToKeep <= nbMaxFragments; nbProductsToKeep++) { bool validProducts = true; int nbIgnoredSpectrum = 0; List <CharacterizedPrecursor> Isomers = new List <CharacterizedPrecursor>(); foreach (double mz in spikes.Keys) { if (Math.Abs(Utilities.Numerics.CalculateMassError(mz, mixedPrecursor.MZ, dbOptions.precursorMassTolerance.Units)) <= dbOptions.precursorMassTolerance.Value) { foreach (Sample sample in spikes[mz].Keys) { if (spikes[mz][sample].IsValid(nbProductsToKeep)) { Isomers.Add(spikes[mz][sample]); } else { validProducts = false; } } } } if (validProducts) { double cumulError = 0; Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> curves = new Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>(); foreach (Query query in mixedPrecursor.Queries) { double timeInMilliSeconds = query.spectrum.RetentionTimeInMin * 60.0 * 1000.0; // double overFlow = 0; double underFlow = 0; double percentError = 0; Dictionary <CharacterizedPrecursor, SolvedResult> finalRatios = SolveFromSpectrum(Isomers, nbProductsToKeep, query.spectrum.Peaks, dbOptions.productMassTolerance, mixedPrecursor.eCurveIntensityPerMS.GetLocalArea(timeInMilliSeconds, timeInMilliSeconds + query.spectrum.InjectionTime),//query.spectrum.PrecursorIntensityPerMilliSecond * query.spectrum.InjectionTime, query.spectrum.PrecursorIntensity, out underFlow, out percentError, dbOptions.ConSole, dbOptions.OutputFolder + "Fragments" + System.IO.Path.DirectorySeparatorChar + "Fragments_" + vsCSV.GetFileName_NoExtension(mixedPrecursor.Sample.sSDF) + "_" + mixedPrecursor.MZ + "_" + query.spectrum.ScanNumber + "_" + nbProductsToKeep + ".csv"); cumulError += underFlow;// percentError; if (percentError < 0.5) { foreach (CharacterizedPrecursor cPep in finalRatios.Keys) { if (!curves.ContainsKey(cPep)) { curves.Add(cPep, new MaxFlowElutionCurve(nbProductsToKeep)); } //curves[cPep].AddPoint(timeInMilliSeconds, finalRatios[cPep].Ratio * query.spectrum.PrecursorIntensityPerMilliSecond); //curves[cPep].eCurvePerMs.AddPoint(timeInMilliSeconds, finalRatios[cPep].Ratio * mixedPrecursor.eCurveIntensityPerMS.InterpolateIntensity(timeInMilliSeconds)); curves[cPep].eCurvePerMs.AddPoint(timeInMilliSeconds, finalRatios[cPep].NbFitTimes / query.spectrum.InjectionTime); curves[cPep].eCurveCount.AddPoint(timeInMilliSeconds, finalRatios[cPep].Ratio * mixedPrecursor.eCurveIntensityCount.InterpolateIntensity(timeInMilliSeconds)); } } else { nbIgnoredSpectrum++; } if (nbIgnoredSpectrum * 2 > mixedPrecursor.Queries.Count) { break; } }//End of foreach query if (nbIgnoredSpectrum * 2 < mixedPrecursor.Queries.Count) { if (nbIgnoredSpectrum > 0) { Console.WriteLine("Ignored Spectrum : " + nbIgnoredSpectrum + " / " + mixedPrecursor.Queries.Count); } foreach (CharacterizedPrecursor cPep in curves.Keys) { curves[cPep].Compute(); } Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> curvesToKeep = new Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>(); foreach (CharacterizedPrecursor cPep in curves.Keys) { if (curves[cPep].eCurvePerMs.Area > 0) { curvesToKeep.Add(cPep, curves[cPep]); } } if (curvesToKeep.Count > 0) { DicOfCurveErrors.Add(curvesToKeep, cumulError); } } } }//End of for each nbProduct Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> averagedValues = mixedPrecursor.ComputePeptideRatios(DicOfCurveErrors); return(averagedValues); }
/// <summary> /// Provides deconvoluted elution curves of mixed spectra from the provided raw files using the provided synthetic raw file /// Exports in CSV files and stores everything in class objects /// </summary> /// <param name="spikedRaws"></param> /// <param name="mixedRaws"></param> /// <param name="fastaFile"></param> /// <param name="folderToOutputTo"></param> /// <param name="conSol"></param> public void Solve(string[] spikedRaws, string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol) { dbOptions = CreateOptions(fastaFile, folderToOutputTo, precTolPpm, prodTolDa, conSol); SpikedSamples = new Samples(dbOptions); for (int i = 0; i < spikedRaws.Length; i++) { SpikedSamples.Add(new Sample(i + 1, 1, 1, spikedRaws[i], spikedRaws[i], 0, "")); } //Precompute Spiked peptide identifications SpikedResult = Ace.Start(dbOptions, SpikedSamples, false, false); SpikedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "SpikedSamplesPSMs.csv"); MixedSamples = new Samples(dbOptions); for (int i = 0; i < mixedRaws.Length; i++) { MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, "")); } //Precompute Mixed peptide identifications mixedResult = Ace.Start(dbOptions, MixedSamples, false, false); if (mixedResult == null) { conSol.WriteLine("OOPS! No queries could be extracted from the list of mixed spectrum files..."); } else { mixedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "MixedSamplesPSMs.csv"); conSol.WriteLine("Computing gradient descents..."); //Compute all usable spiked peptides characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(SpikedSamples, SpikedResult, dbOptions, nbMinFragments, nbMaxFragments); ExportSpikedSampleResult(characterizedPeptides, dbOptions); vsCSVWriter writerCumul = new vsCSVWriter(OutputFolder + "Results.csv"); string titleCombined = "Mixed Sample,Precursor"; string curveStr = "Polynomial Curve,"; string spikedIntensityStr = "Area under the curve,"; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge; if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3) { curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2]; } else { curveStr += ",NA"; } spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area; } } writerCumul.AddLine(titleCombined); writerCumul.AddLine(curveStr); writerCumul.AddLine(spikedIntensityStr); mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in MixedSamples) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides)); } //Get the list of precursors to characterize foreach (Sample mixedSample in MixedSamples) { foreach (double keyMz in characterizedPeptides.Keys) { List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = GetRatios(characterizedPeptides, mPrec, dbOptions, nbMinFragments, nbMaxFragments); listOfRatios.Add(ratios); ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions); } } bool isEmpty = true; string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { double cumulArea = 0.0; foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios in listOfRatios) { if (ratios.ContainsKey(charPrec)) { cumulArea += ratios[charPrec].eCurvePerMs.Area; } } resultStr += "," + cumulArea; if (cumulArea > 0) { isEmpty = false; } } } if (!isEmpty) { writerCumul.AddLine(resultStr); } } } writerCumul.WriteToFile(); //List Modifications Dictionary <Modification, double> dicOfIntensityPerMod = new Dictionary <Modification, double>(); foreach (Sample sample in mixedPrecursors.Keys) { foreach (MixedPrecursor mP in mixedPrecursors[sample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (cP.Peptide.VariableModifications != null) { foreach (Modification mod in cP.Peptide.VariableModifications.Values) { if (!dicOfIntensityPerMod.ContainsKey(mod)) { dicOfIntensityPerMod.Add(mod, 0.0); } } } } } } //Compute site occupancy for identical sequences (real positionnal isomers) vsCSVWriter writerSitesOccupancy = new vsCSVWriter(OutputFolder + "Results_SiteOccupancy.csv"); List <Protein> AllProteins = Ace.ReadProteomeFromFasta(fastaFile, false, dbOptions); foreach (Protein protein in AllProteins) { string newTitleProtein = protein.Description.Replace(',', ' ') + "," + protein.Sequence; for (int i = 0; i < protein.Sequence.Length; i++) { newTitleProtein += "," + protein[i].ToString(); } writerSitesOccupancy.AddLine(newTitleProtein); foreach (Sample mixedSample in mixedPrecursors.Keys) { string coverage = "Coverage," + mixedSample.Name; for (int i = 0; i < protein.Sequence.Length; i++) { double cumulSite = 0.0; newTitleProtein += "," + protein[i].ToString(); foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber) { cumulSite += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } coverage += "," + cumulSite; } writerSitesOccupancy.AddLine(coverage); } foreach (Modification mod in dicOfIntensityPerMod.Keys) { Dictionary <Sample, string> dicOfLines = new Dictionary <Sample, string>(); for (int i = 0; i < protein.Sequence.Length; i++) { foreach (Sample mixedSample in mixedPrecursors.Keys) { double cumulModArea = 0.0; foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber && cP.Peptide.VariableModifications != null) { foreach (int pos in cP.Peptide.VariableModifications.Keys) { if (cP.Peptide.StartResidueNumber + pos - 2 == i + 1 && cP.Peptide.VariableModifications[pos] == mod) { cumulModArea += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } } } if (!dicOfLines.ContainsKey(mixedSample)) { dicOfLines.Add(mixedSample, mod.Description + "," + mixedSample.Name + "," + cumulModArea); } else { dicOfLines[mixedSample] += "," + cumulModArea; } } } foreach (string line in dicOfLines.Values) { writerSitesOccupancy.AddLine(line); } } } writerSitesOccupancy.WriteToFile(); } }
/// <summary> /// Exports a CSV file with the area under the curve of individual isomers found in the mixed spectras /// </summary> /// <param name="ratios"></param> /// <param name="mixedSample"></param> /// <param name="mixedPrecursor"></param> /// <param name="keyMz"></param> /// <param name="dbOptions"></param> private static void ExportMixedSampleResult(Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios, Sample mixedSample, MixedPrecursor mixedPrecursor, double keyMz, DBOptions dbOptions) { // Export results in a file vsCSVWriter writerRatio = new vsCSVWriter(dbOptions.OutputFolder + @"Individual\" + vsCSV.GetFileName_NoExtension(mixedSample.sSDF) + "_" + keyMz + "MZ_" + mixedPrecursor.Queries[0].spectrum.RetentionTimeInMin + "min.csv"); string titleIndividual = "Scan time,Total Area,Intensity per milliseconds"; foreach (CharacterizedPrecursor charPep in ratios.Keys) { titleIndividual += "," + charPep.Peptide.Sequence; } titleIndividual += ",Intensity Counts"; foreach (CharacterizedPrecursor charPep in ratios.Keys) { titleIndividual += "," + charPep.Peptide.Sequence; } writerRatio.AddLine(titleIndividual); string line = "Total," + mixedPrecursor.eCurveIntensityCount.Area + ","; foreach (CharacterizedPrecursor charPep in ratios.Keys) { line += "," + ratios[charPep].eCurvePerMs.Area; } line += ","; foreach (CharacterizedPrecursor charPep in ratios.Keys) { line += "," + ratios[charPep].eCurveCount.Area; } writerRatio.AddLine(line); for (int i = 0; i < mixedPrecursor.eCurveIntensityCount.intensityCount.Count; i++) { line = mixedPrecursor.eCurveIntensityCount.time[i] / (1000.0 * 60.0) + "," + mixedPrecursor.eCurveIntensityCount.InterpolateIntensity(mixedPrecursor.eCurveIntensityCount.time[i]) + ","; foreach (CharacterizedPrecursor charPep in ratios.Keys) { line += "," + ratios[charPep].eCurvePerMs.InterpolateIntensity(mixedPrecursor.eCurveIntensityCount.time[i]); } line += ","; foreach (CharacterizedPrecursor charPep in ratios.Keys) { line += "," + ratios[charPep].eCurveCount.InterpolateIntensity(mixedPrecursor.eCurveIntensityCount.time[i]); } writerRatio.AddLine(line); } writerRatio.WriteToFile(); }
public static void ProjectMerge(PeptidAce.Utilities.Interfaces.IConSol console) { string strProjectAll = @"C:\_IRIC\Data\NB\ProjectFile_EverythingReplicates_Oct.csv"; string project = @"C:\_IRIC\Data\NB\ProjectTest_AllAce_Spiked_19Oct.csv"; string fastaFile = @"C:\_IRIC\Data\NB\peptide.fasta"; DBOptions options = PositionnalIsomerSolver.CreateOptions(fastaFile, @"C:\_IRIC\Data\NB\Units\", 8, 0.05, console); Samples samplesMixed = new Samples(strProjectAll, 0, options); Samples samplesSynth = new Samples(project, 0, options); PositionnalIsomerSolver newSolver = new PositionnalIsomerSolver(); newSolver.precTolPpm = 15; newSolver.prodTolDa = 0.05; newSolver.nbMinFragments = 5; newSolver.nbMaxFragments = 5; string[] synths = new string[samplesSynth.Count]; for (int i = 0; i < synths.Length; i++) { synths[i] = samplesSynth[i].sSDF; } string[] mixed = new string[samplesMixed.Count]; for (int i = 0; i < mixed.Length; i++) { mixed[i] = samplesMixed[i].sSDF; } newSolver.Solve(synths, mixed, fastaFile, Utilities.vsCSV.GetFolder(mixed[0]), options.ConSole); //Precompute Spiked peptide identifications Result SpikedResult = Ace.Start(options, samplesSynth, false, false); Result mixedResult = Ace.Start(options, samplesMixed, false, false); //Compute all usable spiked peptides Dictionary <double, Dictionary <Sample, CharacterizedPrecursor> > characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(samplesSynth, SpikedResult, options, newSolver.nbMinFragments, newSolver.nbMaxFragments); Dictionary <Sample, List <MixedPrecursor> > mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in samplesMixed) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, options, characterizedPeptides)); } Dictionary <Sample, List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > > results = new Dictionary <Sample, List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > >(); //Get the list of precursors to characterize foreach (Sample mixedSample in samplesMixed) { foreach (double keyMz in characterizedPeptides.Keys) { //List<Dictionary<CharacterizedPrecursor, MaxFlowElutionCurve>> listOfRatios = new List<Dictionary<CharacterizedPrecursor, MaxFlowElutionCurve>>(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = PositionnalIsomerSolver.GetRatios(characterizedPeptides, mPrec, options, newSolver.nbMinFragments, newSolver.nbMaxFragments); if (!results.ContainsKey(mixedSample)) { results.Add(mixedSample, new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >()); } results[mixedSample].Add(ratios); } } } } List <CharacterizedPrecursor> precursors = new List <CharacterizedPrecursor>(); foreach (Dictionary <Sample, CharacterizedPrecursor> dic in characterizedPeptides.Values) { foreach (CharacterizedPrecursor cP in dic.Values) { precursors.Add(cP); } } //Create average of each characterized peptide plus standard deviance vsCSVWriter writerArea = new vsCSVWriter(@"C:\_IRIC\Data\NB\Merge\stats_Area.csv"); string lineC = "Count,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } lineC += "Intensity per ms,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } lineC += "Standard Deviation Count,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } lineC += "Standard Deviation per ms,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } writerArea.AddLine(lineC); foreach (int cond in samplesMixed.GetConditions()) { Dictionary <CharacterizedPrecursor, Dictionary <int, MaxFlowElutionCurve> > deconvoluted = new Dictionary <CharacterizedPrecursor, Dictionary <int, MaxFlowElutionCurve> >(); string sampleName = ""; foreach (Sample mixedSample in results.Keys) { if (mixedSample.PROJECT.CONDITION == cond) { sampleName = vsCSV.GetFileName_NoExtension(mixedSample.sSDF); foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratio in results[mixedSample]) { foreach (CharacterizedPrecursor cP in ratio.Keys) { if (ratio[cP].eCurveCount.Area > 0) { if (!deconvoluted.ContainsKey(cP)) { deconvoluted.Add(cP, new Dictionary <int, MaxFlowElutionCurve>()); } if (deconvoluted[cP].ContainsKey(mixedSample.PROJECT.REPLICATE)) { if (deconvoluted[cP][mixedSample.PROJECT.REPLICATE].eCurveCount.Area < ratio[cP].eCurveCount.Area) { deconvoluted[cP][mixedSample.PROJECT.REPLICATE] = ratio[cP]; } } else { deconvoluted[cP].Add(mixedSample.PROJECT.REPLICATE, ratio[cP]); } //deconvoluted[cP].Add(ratio[cP]); } } } } } Dictionary <int, double> totalIntensityCount = new Dictionary <int, double>(); Dictionary <int, double> totalIntensityPerMs = new Dictionary <int, double>(); foreach (CharacterizedPrecursor cP in precursors) { if (deconvoluted.ContainsKey(cP)) { foreach (int keyRep in deconvoluted[cP].Keys) //foreach (MaxFlowElutionCurve curve in deconvoluted[cP]) { if (!totalIntensityCount.ContainsKey(keyRep)) { totalIntensityCount.Add(keyRep, 0.0); totalIntensityPerMs.Add(keyRep, 0.0); } MaxFlowElutionCurve curve = deconvoluted[cP][keyRep]; totalIntensityCount[keyRep] += curve.eCurveCount.Area; totalIntensityPerMs[keyRep] += curve.eCurvePerMs.Area; } } } string lineArea = sampleName + ","; string lineMS = ","; string stdDevCount = ""; string stdDevMS = ""; //1) Compute an average out of the replicates foreach (CharacterizedPrecursor cP in precursors) { if (deconvoluted.ContainsKey(cP)) { double averageAreaMS = 0; double averageAreaCount = 0; foreach (MaxFlowElutionCurve curve in deconvoluted[cP].Values) { averageAreaCount += curve.eCurveCount.Area; averageAreaMS += curve.eCurvePerMs.Area; } if (averageAreaCount > 0) { averageAreaCount = (averageAreaCount / ((double)deconvoluted[cP].Count)); averageAreaMS = (averageAreaMS / ((double)deconvoluted[cP].Count)); double deNormAverageCount = 0.0; double deNormAveragePerMs = 0.0; List <double> repAreaCount = new List <double>(); List <double> repAreaMS = new List <double>(); foreach (int keyRep in deconvoluted[cP].Keys) { MaxFlowElutionCurve curve = deconvoluted[cP][keyRep]; double tmpCount = (curve.eCurveCount.Area / totalIntensityCount[keyRep]) * averageAreaCount; deNormAverageCount += tmpCount; repAreaCount.Add(tmpCount); double tmpPerMs = (curve.eCurvePerMs.Area / totalIntensityPerMs[keyRep]) * averageAreaMS; deNormAveragePerMs += tmpPerMs; repAreaMS.Add(tmpPerMs); } lineArea += (deNormAverageCount / ((double)repAreaCount.Count)) + ","; lineMS += (deNormAveragePerMs / ((double)repAreaMS.Count)) + ","; if (repAreaCount.Count > 1) { stdDevCount += MathNet.Numerics.Statistics.ArrayStatistics.StandardDeviation(repAreaCount.ToArray()) + ","; stdDevMS += MathNet.Numerics.Statistics.ArrayStatistics.StandardDeviation(repAreaMS.ToArray()) + ","; } else { stdDevCount += ","; stdDevMS += ","; } } else { lineArea += ","; lineMS += ","; stdDevCount += ","; stdDevMS += ","; } } else { lineArea += ","; lineMS += ","; stdDevCount += ","; stdDevMS += ","; } } writerArea.AddLine(lineArea + lineMS + "," + stdDevCount + "," + stdDevMS); //2) Add replicates results (to use for standard deviation) } writerArea.WriteToFile(); }
public void Solve(string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol) { dbOptions = CreateOptions(fastaFile, folderToOutputTo, conSol); MixedSamples = new Samples(dbOptions); for (int i = 0; i < mixedRaws.Length; i++) { MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, "")); } //Precompute Mixed peptide identifications mixedResult = Ace.Start(dbOptions, MixedSamples, false, false); conSol.WriteLine("Computing gradient descents..."); //Compute all usable spiked peptides characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(MixedSamples, mixedResult, dbOptions, nbMinFragments, nbMaxFragments); ExportSpikedSampleResult(characterizedPeptides, dbOptions); vsCSVWriter writerCumul = new vsCSVWriter(OutputFolder + "Results.csv"); string titleCombined = "Mixed Sample,Precursor"; string curveStr = "Polynomial Curve,"; string spikedIntensityStr = "Area under the curve,"; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge; if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3) { curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2]; } else { curveStr += ",NA"; } spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area; } } writerCumul.AddLine(titleCombined); writerCumul.AddLine(curveStr); writerCumul.AddLine(spikedIntensityStr); //mixedPrecursors = new Dictionary<Sample, Dictionary<double, MixedPrecursor>>(); mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in MixedSamples) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides)); } //Get the list of precursors to characterize foreach (Sample mixedSample in MixedSamples) { foreach (double keyMz in characterizedPeptides.Keys) { List <Dictionary <Peptide, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <Peptide, MaxFlowElutionCurve> >(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <Peptide, MaxFlowElutionCurve> ratios = GetRatiosNoSpikes(mPrec, precision); listOfRatios.Add(ratios); ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions); } } /* * string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz; * foreach (double precursor in characterizedPeptides.Keys) * { * foreach (Peptide charPrec in characterizedPeptides[precursor].Values) * { * double cumulArea = 0.0; * foreach (Dictionary<Peptide, ElutionCurve> ratios in listOfRatios) * if (ratios.ContainsKey(charPrec)) * cumulArea += ratios[charPrec].Area; * resultStr += "," + cumulArea; * } * } * writerCumul.AddLine(resultStr);//*/ } } writerCumul.WriteToFile(); }
public Dictionary <Peptide, MaxFlowElutionCurve> ComputePeptideRatios(Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double> dicOfCurveErrorsP) { Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double> dicOfCorrelations = new Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double>(); foreach (Dictionary <Peptide, MaxFlowElutionCurve> dicOfCurve in dicOfCurveErrorsP.Keys) { dicOfCorrelations.Add(dicOfCurve, 1.0 / (double)dicOfCurveErrorsP.Count); } //Purge worst curves Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double> dicOfCurves = new Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double>(); if (dicOfCurveErrorsP.Count > 1) { double median = MathNet.Numerics.Statistics.Statistics.Median(dicOfCurveErrorsP.Values); double maxMed = median;// +0.5 * MathNet.Numerics.Statistics.Statistics.Variance(dicOfCurveErrorsP.Values); foreach (Dictionary <Peptide, MaxFlowElutionCurve> dic in dicOfCurveErrorsP.Keys) { if (dicOfCurveErrorsP[dic] <= maxMed) { dicOfCurves.Add(dic, dicOfCurveErrorsP[dic]); } } } else { dicOfCurves = dicOfCurveErrorsP; } Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double> lastDicOfCurves = dicOfCurves; int nbRun = 2; while (nbRun > 0) { nbRun--; dicOfCurves = lastDicOfCurves; //Normalize already computed correlation factors for the remaning curves (sum must equal 1) double sumOfCorr = 0.0; foreach (Dictionary <Peptide, MaxFlowElutionCurve> dicOfCurve in dicOfCurves.Keys) { sumOfCorr += dicOfCorrelations[dicOfCurve]; } foreach (Dictionary <Peptide, MaxFlowElutionCurve> dicOfCurve in dicOfCurves.Keys) { dicOfCorrelations[dicOfCurve] /= sumOfCorr; } //Compute average from weighted curves Dictionary <Peptide, double> average = new Dictionary <Peptide, double>(); foreach (Dictionary <Peptide, MaxFlowElutionCurve> dicOfCurve in dicOfCurves.Keys) { Dictionary <Peptide, double> areas = MixedPrecursor.GetAreas(dicOfCurve); foreach (Peptide cPep in areas.Keys) { if (!average.ContainsKey(cPep)) { average.Add(cPep, 0); } average[cPep] += areas[cPep] * dicOfCorrelations[dicOfCurve]; } } //Compute correlation between average and curves List <double> corrs = new List <double>(); foreach (Dictionary <Peptide, MaxFlowElutionCurve> dicOfCurve in dicOfCurves.Keys) { Dictionary <Peptide, double> elution = new Dictionary <Peptide, double>(); foreach (Peptide cPep in average.Keys) { if (dicOfCurve.ContainsKey(cPep)) { elution.Add(cPep, dicOfCurve[cPep].eCurvePerMs.Area); } else { elution.Add(cPep, 0); } } double tmp = 1.0; if (elution.Count > 1) { tmp = Math.Abs(MathNet.Numerics.Statistics.Correlation.Pearson(average.Values, elution.Values)); } dicOfCorrelations[dicOfCurve] = tmp; corrs.Add(tmp); } //Remove worst curves if (corrs.Count > 1) { Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double> dicOfCurves2 = new Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double>(); double medianCorr = MathNet.Numerics.Statistics.Statistics.Median(corrs); double maxCorr = medianCorr + 0.5 * MathNet.Numerics.Statistics.Statistics.Variance(corrs); foreach (Dictionary <Peptide, MaxFlowElutionCurve> dic in dicOfCurves.Keys) { if (dicOfCorrelations[dic] <= maxCorr) { dicOfCurves2.Add(dic, dicOfCurves[dic]); } } lastDicOfCurves = dicOfCurves2; } }//End of While nbRun not exhausted Dictionary <Peptide, ElutionCurveMerger> cumulDic = new Dictionary <Peptide, ElutionCurveMerger>(); foreach (Dictionary <Peptide, MaxFlowElutionCurve> dicOfCurve in lastDicOfCurves.Keys) { foreach (Peptide cPep in dicOfCurve.Keys) { if (!cumulDic.ContainsKey(cPep)) { cumulDic.Add(cPep, new ElutionCurveMerger()); } cumulDic[cPep].AddCurve(dicOfCurve[cPep], dicOfCorrelations[dicOfCurve]); } } Dictionary <Peptide, MaxFlowElutionCurve> peptideRatiosNoSpike = new Dictionary <Peptide, MaxFlowElutionCurve>(); foreach (Peptide cPep in cumulDic.Keys) { peptideRatiosNoSpike.Add(cPep, cumulDic[cPep].Merge()); } return(peptideRatiosNoSpike); }
private Dictionary <Peptide, MaxFlowElutionCurve> GetRatiosNoSpikes(MixedPrecursor mixedPrecursor, long precision) { Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double> DicOfCurveErrors = new Dictionary <Dictionary <Peptide, MaxFlowElutionCurve>, double>(); for (int nbProductsToKeep = nbMinFragments; nbProductsToKeep <= nbMaxFragments; nbProductsToKeep++) { bool validProducts = true; int nbIgnoredSpectrum = 0; if (validProducts) { double cumulError = 0; Dictionary <Peptide, MaxFlowElutionCurve> curves = new Dictionary <Peptide, MaxFlowElutionCurve>(); foreach (Query query in mixedPrecursor.Queries) { double timeInMilliSeconds = query.spectrum.RetentionTimeInMin * 60.0 * 1000.0; // double overFlow = 0; double underFlow = 0; double percentError = 0; Dictionary <PeptideSpectrumMatch, SolvedResult> finalRatios = SolveFromFragmentScore(nbProductsToKeep, precision, query.spectrum.Peaks, dbOptions.productMassTolerance, mixedPrecursor.eCurveIntensityPerMS.GetLocalArea(timeInMilliSeconds, timeInMilliSeconds + query.spectrum.InjectionTime), query, out underFlow, out percentError, dbOptions.ConSole); cumulError += underFlow;// percentError; if (percentError < 0.5) { foreach (PeptideSpectrumMatch cPep in finalRatios.Keys) { if (!curves.ContainsKey(cPep.Peptide)) { curves.Add(cPep.Peptide, new MaxFlowElutionCurve(nbProductsToKeep)); } //curves[cPep].AddPoint(timeInMilliSeconds, finalRatios[cPep].Ratio * query.spectrum.PrecursorIntensityPerMilliSecond); curves[cPep.Peptide].eCurveCount.AddPoint(timeInMilliSeconds, finalRatios[cPep].Ratio * mixedPrecursor.eCurveIntensityCount.InterpolateIntensity(timeInMilliSeconds)); curves[cPep.Peptide].eCurvePerMs.AddPoint(timeInMilliSeconds, finalRatios[cPep].Ratio * mixedPrecursor.eCurveIntensityPerMS.InterpolateIntensity(timeInMilliSeconds)); } } else { nbIgnoredSpectrum++; } if (nbIgnoredSpectrum * 2 > mixedPrecursor.Queries.Count) { break; } }//End of foreach query if (nbIgnoredSpectrum * 2 < mixedPrecursor.Queries.Count) { if (nbIgnoredSpectrum > 0) { Console.WriteLine("Ignored Spectrum : " + nbIgnoredSpectrum + " / " + mixedPrecursor.Queries.Count); } foreach (Peptide cPep in curves.Keys) { curves[cPep].Compute(); } Dictionary <Peptide, MaxFlowElutionCurve> curvesToKeep = new Dictionary <Peptide, MaxFlowElutionCurve>(); foreach (Peptide cPep in curves.Keys) { if (curves[cPep].eCurvePerMs.Area > 0) { curvesToKeep.Add(cPep, curves[cPep]); } } if (curvesToKeep.Count > 0) { DicOfCurveErrors.Add(curvesToKeep, cumulError); } } } }//End of for each nbProduct Dictionary <Peptide, MaxFlowElutionCurve> averagedValues = ComputePeptideRatios(DicOfCurveErrors); return(averagedValues); }
private Dictionary <CharacterizedPrecursor, ElutionCurve> GetRatios(Dictionary <double, Dictionary <Sample, CharacterizedPrecursor> > spikes, MixedPrecursor mixedPrecursor) { Dictionary <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>, double> DicOfCurveErrors = new Dictionary <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>, double>(); for (int nbProductsToKeep = nbMinFragments; nbProductsToKeep <= nbMaxFragments; nbProductsToKeep++) { bool validProducts = true; int nbIgnoredSpectrum = 0; List <CharacterizedPrecursor> Isomers = new List <CharacterizedPrecursor>(); foreach (double mz in spikes.Keys) { if (Math.Abs(Proteomics.Utilities.Numerics.CalculateMassError(mz, mixedPrecursor.MZ, dbOptions.precursorMassTolerance.Units)) <= dbOptions.precursorMassTolerance.Value) { foreach (Sample sample in spikes[mz].Keys) { if (spikes[mz][sample].IsValid(nbProductsToKeep)) { Isomers.Add(spikes[mz][sample]); } else { validProducts = false; } } } } if (validProducts) { double cumulError = 0; Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> curves = new Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>(); foreach (Query query in mixedPrecursor.Queries) { double timeInMilliSeconds = query.spectrum.RetentionTimeInMin * 60.0 * 1000.0; // double overFlow = 0; double underFlow = 0; double percentError = 0; Dictionary <CharacterizedPrecursor, SolvedResult> finalRatios = SolveFromSpectrum(Isomers, nbProductsToKeep, 1000, query.spectrum.Peaks, dbOptions.productMassTolerance, query.spectrum.PrecursorIntensityPerMilliSecond * query.spectrum.InjectionTime, out underFlow, out percentError, dbOptions.ConSole); cumulError += underFlow;// percentError; if (percentError < 0.5) { foreach (CharacterizedPrecursor cPep in finalRatios.Keys) { if (!curves.ContainsKey(cPep)) { curves.Add(cPep, new MaxFlowElutionCurve(nbProductsToKeep)); } //curves[cPep].AddPoint(timeInMilliSeconds, finalRatios[cPep].Ratio * query.spectrum.PrecursorIntensityPerMilliSecond); double intensity = finalRatios[cPep].Ratio * mixedPrecursor.eCurve.InterpolateIntensity(timeInMilliSeconds); if (intensity < 0) { intensity = 0; } curves[cPep].AddPoint(timeInMilliSeconds, intensity); } } else { nbIgnoredSpectrum++; } if (nbIgnoredSpectrum * 2 > mixedPrecursor.Queries.Count) { break; } }//End of foreach query if (nbIgnoredSpectrum * 2 < mixedPrecursor.Queries.Count) { if (nbIgnoredSpectrum > 0) { Console.WriteLine("Ignored Spectrum : " + nbIgnoredSpectrum + " / " + mixedPrecursor.Queries.Count); } foreach (CharacterizedPrecursor cPep in curves.Keys) { curves[cPep].Compute(); } Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> curvesToKeep = new Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve>(); foreach (CharacterizedPrecursor cPep in curves.Keys) { if (curves[cPep].Area > 0) { curvesToKeep.Add(cPep, curves[cPep]); } } if (curvesToKeep.Count > 0) { DicOfCurveErrors.Add(curvesToKeep, cumulError); } } } }//End of for each nbProduct Dictionary <CharacterizedPrecursor, ElutionCurve> averagedValues = mixedPrecursor.ComputePeptideRatios(DicOfCurveErrors); return(averagedValues); }