public static void ProjectMerge(PeptidAce.Utilities.Interfaces.IConSol console) { string strProjectAll = @"C:\_IRIC\Data\NB\ProjectFile_EverythingReplicates_Oct.csv"; string project = @"C:\_IRIC\Data\NB\ProjectTest_AllAce_Spiked_19Oct.csv"; string fastaFile = @"C:\_IRIC\Data\NB\peptide.fasta"; DBOptions options = PositionnalIsomerSolver.CreateOptions(fastaFile, @"C:\_IRIC\Data\NB\Units\", 8, 0.05, console); Samples samplesMixed = new Samples(strProjectAll, 0, options); Samples samplesSynth = new Samples(project, 0, options); PositionnalIsomerSolver newSolver = new PositionnalIsomerSolver(); newSolver.precTolPpm = 15; newSolver.prodTolDa = 0.05; newSolver.nbMinFragments = 5; newSolver.nbMaxFragments = 5; string[] synths = new string[samplesSynth.Count]; for (int i = 0; i < synths.Length; i++) { synths[i] = samplesSynth[i].sSDF; } string[] mixed = new string[samplesMixed.Count]; for (int i = 0; i < mixed.Length; i++) { mixed[i] = samplesMixed[i].sSDF; } newSolver.Solve(synths, mixed, fastaFile, Utilities.vsCSV.GetFolder(mixed[0]), options.ConSole); //Precompute Spiked peptide identifications Result SpikedResult = Ace.Start(options, samplesSynth, false, false); Result mixedResult = Ace.Start(options, samplesMixed, false, false); //Compute all usable spiked peptides Dictionary <double, Dictionary <Sample, CharacterizedPrecursor> > characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(samplesSynth, SpikedResult, options, newSolver.nbMinFragments, newSolver.nbMaxFragments); Dictionary <Sample, List <MixedPrecursor> > mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in samplesMixed) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, options, characterizedPeptides)); } Dictionary <Sample, List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > > results = new Dictionary <Sample, List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > >(); //Get the list of precursors to characterize foreach (Sample mixedSample in samplesMixed) { foreach (double keyMz in characterizedPeptides.Keys) { //List<Dictionary<CharacterizedPrecursor, MaxFlowElutionCurve>> listOfRatios = new List<Dictionary<CharacterizedPrecursor, MaxFlowElutionCurve>>(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = PositionnalIsomerSolver.GetRatios(characterizedPeptides, mPrec, options, newSolver.nbMinFragments, newSolver.nbMaxFragments); if (!results.ContainsKey(mixedSample)) { results.Add(mixedSample, new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >()); } results[mixedSample].Add(ratios); } } } } List <CharacterizedPrecursor> precursors = new List <CharacterizedPrecursor>(); foreach (Dictionary <Sample, CharacterizedPrecursor> dic in characterizedPeptides.Values) { foreach (CharacterizedPrecursor cP in dic.Values) { precursors.Add(cP); } } //Create average of each characterized peptide plus standard deviance vsCSVWriter writerArea = new vsCSVWriter(@"C:\_IRIC\Data\NB\Merge\stats_Area.csv"); string lineC = "Count,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } lineC += "Intensity per ms,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } lineC += "Standard Deviation Count,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } lineC += "Standard Deviation per ms,"; foreach (CharacterizedPrecursor cP in precursors) { lineC += cP.Peptide.Sequence + ","; } writerArea.AddLine(lineC); foreach (int cond in samplesMixed.GetConditions()) { Dictionary <CharacterizedPrecursor, Dictionary <int, MaxFlowElutionCurve> > deconvoluted = new Dictionary <CharacterizedPrecursor, Dictionary <int, MaxFlowElutionCurve> >(); string sampleName = ""; foreach (Sample mixedSample in results.Keys) { if (mixedSample.PROJECT.CONDITION == cond) { sampleName = vsCSV.GetFileName_NoExtension(mixedSample.sSDF); foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratio in results[mixedSample]) { foreach (CharacterizedPrecursor cP in ratio.Keys) { if (ratio[cP].eCurveCount.Area > 0) { if (!deconvoluted.ContainsKey(cP)) { deconvoluted.Add(cP, new Dictionary <int, MaxFlowElutionCurve>()); } if (deconvoluted[cP].ContainsKey(mixedSample.PROJECT.REPLICATE)) { if (deconvoluted[cP][mixedSample.PROJECT.REPLICATE].eCurveCount.Area < ratio[cP].eCurveCount.Area) { deconvoluted[cP][mixedSample.PROJECT.REPLICATE] = ratio[cP]; } } else { deconvoluted[cP].Add(mixedSample.PROJECT.REPLICATE, ratio[cP]); } //deconvoluted[cP].Add(ratio[cP]); } } } } } Dictionary <int, double> totalIntensityCount = new Dictionary <int, double>(); Dictionary <int, double> totalIntensityPerMs = new Dictionary <int, double>(); foreach (CharacterizedPrecursor cP in precursors) { if (deconvoluted.ContainsKey(cP)) { foreach (int keyRep in deconvoluted[cP].Keys) //foreach (MaxFlowElutionCurve curve in deconvoluted[cP]) { if (!totalIntensityCount.ContainsKey(keyRep)) { totalIntensityCount.Add(keyRep, 0.0); totalIntensityPerMs.Add(keyRep, 0.0); } MaxFlowElutionCurve curve = deconvoluted[cP][keyRep]; totalIntensityCount[keyRep] += curve.eCurveCount.Area; totalIntensityPerMs[keyRep] += curve.eCurvePerMs.Area; } } } string lineArea = sampleName + ","; string lineMS = ","; string stdDevCount = ""; string stdDevMS = ""; //1) Compute an average out of the replicates foreach (CharacterizedPrecursor cP in precursors) { if (deconvoluted.ContainsKey(cP)) { double averageAreaMS = 0; double averageAreaCount = 0; foreach (MaxFlowElutionCurve curve in deconvoluted[cP].Values) { averageAreaCount += curve.eCurveCount.Area; averageAreaMS += curve.eCurvePerMs.Area; } if (averageAreaCount > 0) { averageAreaCount = (averageAreaCount / ((double)deconvoluted[cP].Count)); averageAreaMS = (averageAreaMS / ((double)deconvoluted[cP].Count)); double deNormAverageCount = 0.0; double deNormAveragePerMs = 0.0; List <double> repAreaCount = new List <double>(); List <double> repAreaMS = new List <double>(); foreach (int keyRep in deconvoluted[cP].Keys) { MaxFlowElutionCurve curve = deconvoluted[cP][keyRep]; double tmpCount = (curve.eCurveCount.Area / totalIntensityCount[keyRep]) * averageAreaCount; deNormAverageCount += tmpCount; repAreaCount.Add(tmpCount); double tmpPerMs = (curve.eCurvePerMs.Area / totalIntensityPerMs[keyRep]) * averageAreaMS; deNormAveragePerMs += tmpPerMs; repAreaMS.Add(tmpPerMs); } lineArea += (deNormAverageCount / ((double)repAreaCount.Count)) + ","; lineMS += (deNormAveragePerMs / ((double)repAreaMS.Count)) + ","; if (repAreaCount.Count > 1) { stdDevCount += MathNet.Numerics.Statistics.ArrayStatistics.StandardDeviation(repAreaCount.ToArray()) + ","; stdDevMS += MathNet.Numerics.Statistics.ArrayStatistics.StandardDeviation(repAreaMS.ToArray()) + ","; } else { stdDevCount += ","; stdDevMS += ","; } } else { lineArea += ","; lineMS += ","; stdDevCount += ","; stdDevMS += ","; } } else { lineArea += ","; lineMS += ","; stdDevCount += ","; stdDevMS += ","; } } writerArea.AddLine(lineArea + lineMS + "," + stdDevCount + "," + stdDevMS); //2) Add replicates results (to use for standard deviation) } writerArea.WriteToFile(); }
/// <summary> /// Provides deconvoluted elution curves of mixed spectra from the provided raw files using the provided synthetic raw file /// Exports in CSV files and stores everything in class objects /// </summary> /// <param name="spikedRaws"></param> /// <param name="mixedRaws"></param> /// <param name="fastaFile"></param> /// <param name="folderToOutputTo"></param> /// <param name="conSol"></param> public void Solve(string[] spikedRaws, string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol) { dbOptions = CreateOptions(fastaFile, folderToOutputTo, precTolPpm, prodTolDa, conSol); SpikedSamples = new Samples(dbOptions); for (int i = 0; i < spikedRaws.Length; i++) { SpikedSamples.Add(new Sample(i + 1, 1, 1, spikedRaws[i], spikedRaws[i], 0, "")); } //Precompute Spiked peptide identifications SpikedResult = Ace.Start(dbOptions, SpikedSamples, false, false); SpikedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "SpikedSamplesPSMs.csv"); MixedSamples = new Samples(dbOptions); for (int i = 0; i < mixedRaws.Length; i++) { MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, "")); } //Precompute Mixed peptide identifications mixedResult = Ace.Start(dbOptions, MixedSamples, false, false); if (mixedResult == null) { conSol.WriteLine("OOPS! No queries could be extracted from the list of mixed spectrum files..."); } else { mixedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "MixedSamplesPSMs.csv"); conSol.WriteLine("Computing gradient descents..."); //Compute all usable spiked peptides characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(SpikedSamples, SpikedResult, dbOptions, nbMinFragments, nbMaxFragments); ExportSpikedSampleResult(characterizedPeptides, dbOptions); vsCSVWriter writerCumul = new vsCSVWriter(OutputFolder + "Results.csv"); string titleCombined = "Mixed Sample,Precursor"; string curveStr = "Polynomial Curve,"; string spikedIntensityStr = "Area under the curve,"; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge; if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3) { curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2]; } else { curveStr += ",NA"; } spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area; } } writerCumul.AddLine(titleCombined); writerCumul.AddLine(curveStr); writerCumul.AddLine(spikedIntensityStr); mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in MixedSamples) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides)); } //Get the list of precursors to characterize foreach (Sample mixedSample in MixedSamples) { foreach (double keyMz in characterizedPeptides.Keys) { List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = GetRatios(characterizedPeptides, mPrec, dbOptions, nbMinFragments, nbMaxFragments); listOfRatios.Add(ratios); ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions); } } bool isEmpty = true; string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { double cumulArea = 0.0; foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios in listOfRatios) { if (ratios.ContainsKey(charPrec)) { cumulArea += ratios[charPrec].eCurvePerMs.Area; } } resultStr += "," + cumulArea; if (cumulArea > 0) { isEmpty = false; } } } if (!isEmpty) { writerCumul.AddLine(resultStr); } } } writerCumul.WriteToFile(); //List Modifications Dictionary <Modification, double> dicOfIntensityPerMod = new Dictionary <Modification, double>(); foreach (Sample sample in mixedPrecursors.Keys) { foreach (MixedPrecursor mP in mixedPrecursors[sample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (cP.Peptide.VariableModifications != null) { foreach (Modification mod in cP.Peptide.VariableModifications.Values) { if (!dicOfIntensityPerMod.ContainsKey(mod)) { dicOfIntensityPerMod.Add(mod, 0.0); } } } } } } //Compute site occupancy for identical sequences (real positionnal isomers) vsCSVWriter writerSitesOccupancy = new vsCSVWriter(OutputFolder + "Results_SiteOccupancy.csv"); List <Protein> AllProteins = Ace.ReadProteomeFromFasta(fastaFile, false, dbOptions); foreach (Protein protein in AllProteins) { string newTitleProtein = protein.Description.Replace(',', ' ') + "," + protein.Sequence; for (int i = 0; i < protein.Sequence.Length; i++) { newTitleProtein += "," + protein[i].ToString(); } writerSitesOccupancy.AddLine(newTitleProtein); foreach (Sample mixedSample in mixedPrecursors.Keys) { string coverage = "Coverage," + mixedSample.Name; for (int i = 0; i < protein.Sequence.Length; i++) { double cumulSite = 0.0; newTitleProtein += "," + protein[i].ToString(); foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber) { cumulSite += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } coverage += "," + cumulSite; } writerSitesOccupancy.AddLine(coverage); } foreach (Modification mod in dicOfIntensityPerMod.Keys) { Dictionary <Sample, string> dicOfLines = new Dictionary <Sample, string>(); for (int i = 0; i < protein.Sequence.Length; i++) { foreach (Sample mixedSample in mixedPrecursors.Keys) { double cumulModArea = 0.0; foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber && cP.Peptide.VariableModifications != null) { foreach (int pos in cP.Peptide.VariableModifications.Keys) { if (cP.Peptide.StartResidueNumber + pos - 2 == i + 1 && cP.Peptide.VariableModifications[pos] == mod) { cumulModArea += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } } } if (!dicOfLines.ContainsKey(mixedSample)) { dicOfLines.Add(mixedSample, mod.Description + "," + mixedSample.Name + "," + cumulModArea); } else { dicOfLines[mixedSample] += "," + cumulModArea; } } } foreach (string line in dicOfLines.Values) { writerSitesOccupancy.AddLine(line); } } } writerSitesOccupancy.WriteToFile(); } }
public void Solve(string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol) { dbOptions = CreateOptions(fastaFile, folderToOutputTo, conSol); MixedSamples = new Samples(dbOptions); for (int i = 0; i < mixedRaws.Length; i++) { MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, "")); } //Precompute Mixed peptide identifications mixedResult = Ace.Start(dbOptions, MixedSamples, false, false); conSol.WriteLine("Computing gradient descents..."); //Compute all usable spiked peptides characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(MixedSamples, mixedResult, dbOptions, nbMinFragments, nbMaxFragments); ExportSpikedSampleResult(characterizedPeptides, dbOptions); vsCSVWriter writerCumul = new vsCSVWriter(OutputFolder + "Results.csv"); string titleCombined = "Mixed Sample,Precursor"; string curveStr = "Polynomial Curve,"; string spikedIntensityStr = "Area under the curve,"; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge; if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3) { curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2]; } else { curveStr += ",NA"; } spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area; } } writerCumul.AddLine(titleCombined); writerCumul.AddLine(curveStr); writerCumul.AddLine(spikedIntensityStr); //mixedPrecursors = new Dictionary<Sample, Dictionary<double, MixedPrecursor>>(); mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in MixedSamples) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides)); } //Get the list of precursors to characterize foreach (Sample mixedSample in MixedSamples) { foreach (double keyMz in characterizedPeptides.Keys) { List <Dictionary <Peptide, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <Peptide, MaxFlowElutionCurve> >(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <Peptide, MaxFlowElutionCurve> ratios = GetRatiosNoSpikes(mPrec, precision); listOfRatios.Add(ratios); ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions); } } /* * string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz; * foreach (double precursor in characterizedPeptides.Keys) * { * foreach (Peptide charPrec in characterizedPeptides[precursor].Values) * { * double cumulArea = 0.0; * foreach (Dictionary<Peptide, ElutionCurve> ratios in listOfRatios) * if (ratios.ContainsKey(charPrec)) * cumulArea += ratios[charPrec].Area; * resultStr += "," + cumulArea; * } * } * writerCumul.AddLine(resultStr);//*/ } } writerCumul.WriteToFile(); }