public static void MonoAce(IConSol console) { DBOptions dbOptions = GetDBOptions(false, false, console); Samples ProjectRatios = new Samples(@"C:\_IRIC\DATA\NB\ProjectTest_MonoAce_Spiked_19Oct.csv", 0, dbOptions);//Group 2 (all) Samples ProjectMixed = new Samples(@"C:\_IRIC\DATA\NB\ProjectTest_MonoAce_Varied_19Oct.csv", 0, dbOptions); Samples ProjectStable = new Samples(@"C:\_IRIC\DATA\NB\ProjectTest_StableMix_MonoAce_19Oct.csv", 0, dbOptions); string[] SpikedRaws = new string[ProjectRatios.Count]; for (int i = 0; i < ProjectRatios.Count; i++) { SpikedRaws[i] = ProjectRatios[i].sSDF; } string[] MixedRaws = new string[ProjectMixed.Count]; for (int i = 0; i < ProjectMixed.Count; i++) { MixedRaws[i] = ProjectMixed[i].sSDF; } //string[] StableRaws = new string[ProjectStable.Count]; //for (int i = 0; i < ProjectStable.Count; i++) // StableRaws[i] = ProjectStable[i].sSDF; (new PositionnalIsomerSolver()).Solve(SpikedRaws, MixedRaws, dbOptions.FastaDatabaseFilepath, dbOptions.OutputFolder, console); }
public static AnnotatedSpectrum TestNeo4j(IConSol console) { DBOptions dbOptions = GetDBOptions(false, false, console); Samples ProjectRatios = new Samples(@"C:\_IRIC\DATA\NB\ProjectTest_MonoAce_Spiked_19Oct.csv", 0, dbOptions);//Group 2 (all) Result rez = Propheus.Start(dbOptions, ProjectRatios, false, false, false); Database.Neo4j.ResultsToNeo4j.Export(rez); PeptideSpectrumMatch bestPsm = null; foreach (Query query in rez.queries) { foreach (PeptideSpectrumMatch psm in query.psms) { if (psm.Peptide.Sequence.CompareTo("GK(acetylation of K)GGK(propionylation of K)GLGK(propionylation of K)GGAK(propionylation of K)R") == 0 && (bestPsm == null || bestPsm.ProbabilityScore() < query.psms[0].ProbabilityScore())) { bestPsm = psm; } } } AnnotatedSpectrum aSpec = new AnnotatedSpectrum(ProjectRatios[0], bestPsm.Query.spectrum, bestPsm.Peptide); return(aSpec); }
public static object CreateView(string rawName, string scan, IConSol consol) { if (options == null) { options = DotNetMHC.MHCSearcher.CreateOptions(new string[] { "" }, "", 15, 0.05, consol); } //Get file path //Get Spectrum Numerics.SequenceStore store = Numerics.SequenceStore.GetStore(0.05); string rawFile = Numerics.SourceStore.GetDictionary()[rawName]; DotNetMHC.Data.Spectrum spectrum = DotNetMHC.RawExtractor.LoadSpectrum(rawFile, scan, options); List <Data.PeptideView> peptides = store.GetPeptides(spectrum, options); double bestScore = 0.0; Data.PeptideView bestPeptide = peptides[0]; foreach (Data.PeptideView peptide in peptides) { if (peptide.SpectrumScore > bestScore) { bestScore = peptide.SpectrumScore; bestPeptide = peptide; } } return(new Data.SpecView(spectrum, peptides, bestPeptide)); }
public static DBOptions GetDBOptions(bool loadFromRaw, bool onlyYions, IConSol console) { string outputDir = @"C:\_IRIC\DATA\Test\testNB\Iso3\"; string fastaFile = @"C:\_IRIC\Data\NB\peptide.fasta"; DBOptions dbOptions = new DBOptions(fastaFile, console); dbOptions.precursorMassTolerance = new MassTolerance(8 /*8*//*8withoutisotopes*/, MassToleranceUnits.ppm); dbOptions.productMassTolerance = new MassTolerance(20 /*8*//*8withoutisotopes*/, MassToleranceUnits.ppm); //dbOptions.productMassTolerance = new MassTolerance(0.05/*0.034*//*without isotopes*/, MassToleranceUnits.Da);//0.034 is a 60 000 resolution over 2000 range in mz dbOptions.MaximumPeptideMass = 200000; dbOptions.OutputFolder = outputDir; ProteaseDictionary proteases = ProteaseDictionary.Instance; dbOptions.DigestionEnzyme = proteases["no enzyme"]; //trypsin (no proline rule)"]; dbOptions.NoEnzymeSearch = false; // true; dbOptions.DecoyFusion = false; dbOptions.MaximumNumberOfFragmentsPerSpectrum = 400; //dbOptions.protease = proteases["trypsin (no proline rule)"]; dbOptions.ToleratedMissedCleavages = 200;// 2; dbOptions.MinimumPeptideLength = 5; dbOptions.MaximumPeptideLength = 300; GraphML_List <Modification> fixMods = new GraphML_List <Modification>(); //fixMods.Add(ModificationDictionary.Instance["propionylation of K"]); dbOptions.fixedModifications = fixMods; GraphML_List <Modification> varMods = new GraphML_List <Modification>(); varMods.Add(ModificationDictionary.Instance["acetylation of K"]); varMods.Add(ModificationDictionary.Instance["propionylation of K"]); dbOptions.maximumVariableModificationIsoforms = 1024; dbOptions.variableModifications = varMods; dbOptions.addFragmentLoss = false; // true; dbOptions.addFragmentMods = false; // true; dbOptions.fragments = new Fragments(); if (!onlyYions) { dbOptions.fragments.Add(new FragmentA()); dbOptions.fragments.Add(new FragmentB()); dbOptions.fragments.Add(new FragmentC()); dbOptions.fragments.Add(new FragmentX()); dbOptions.fragments.Add(new FragmentZ()); } dbOptions.fragments.Add(new FragmentY()); dbOptions.SaveMS1Peaks = true; dbOptions.SaveMSMSPeaks = true; dbOptions.LoadSpectraIfFound = !loadFromRaw; dbOptions.NbPSMToKeep = 100; return(dbOptions); }
private DBOptions CreateOptions(string fastaFile, string outputFolder, IConSol consol) { DBOptions dbOptions = new DBOptions(fastaFile, consol); dbOptions.precursorMassTolerance = new MassTolerance(precTolPpm, MassToleranceUnits.ppm); dbOptions.productMassTolerance = new MassTolerance(prodTolPpm, MassToleranceUnits.ppm); dbOptions.MaximumPeptideMass = 200000; dbOptions.OutputFolder = outputFolder; ProteaseDictionary proteases = ProteaseDictionary.Instance; dbOptions.DigestionEnzyme = proteases["no enzyme"]; //dbOptions.DigestionEnzyme = proteases["top-down"]; dbOptions.NoEnzymeSearch = false; dbOptions.DecoyFusion = false; dbOptions.MaximumNumberOfFragmentsPerSpectrum = 400; dbOptions.ToleratedMissedCleavages = 200; dbOptions.MinimumPeptideLength = 5; dbOptions.MaximumPeptideLength = 300; GraphML_List <Modification> fixMods = new GraphML_List <Modification>(); dbOptions.fixedModifications = fixMods; GraphML_List <Modification> varMods = new GraphML_List <Modification>(); foreach (string strMod in ModificationDictionary.Instance.Keys) { varMods.Add(ModificationDictionary.Instance[strMod]); } dbOptions.maximumVariableModificationIsoforms = 1024; dbOptions.variableModifications = varMods; dbOptions.addFragmentLoss = false; dbOptions.addFragmentMods = false;//Gives very bad results... might by off dbOptions.fragments = new Fragments(); dbOptions.fragments.Add(new FragmentA()); dbOptions.fragments.Add(new FragmentB()); dbOptions.fragments.Add(new FragmentC()); dbOptions.fragments.Add(new FragmentX()); dbOptions.fragments.Add(new FragmentY()); dbOptions.fragments.Add(new FragmentZ()); dbOptions.SaveMS1Peaks = true; dbOptions.SaveMSMSPeaks = true; dbOptions.LoadSpectraIfFound = true; dbOptions.NbPSMToKeep = 100; return(dbOptions); }
/// <summary> /// Parameter less constructor used for save states /// </summary> public DBOptions(IConSol console = null) { fixedModifications = new GraphML_List <Modification>(); variableModifications = new GraphML_List <Modification>(); fragments = new Fragments(); if (console == null) { ConSole = new ConSolCommandLine(); } else { ConSole = console; } }
public DBOptions(string fasta, IConSol console = null) { if (console == null) { ConSole = new ConSolCommandLine(); } else { ConSole = console; } //Create with default values this.DecoyFusion = true; this.FastaDatabaseFilepath = fasta; this.MaximumPeptideMass = 10000; ProteaseDictionary proteases = ProteaseDictionary.Instance; this.DigestionEnzyme = proteases["no enzyme"]; // proteases["trypsin (no proline rule)"]; this.NoEnzymeSearch = true; this.ToleratedMissedCleavages = 100; // 3;//determines the length of peptides with no-enzyme option this.initiatorMethionineBehavior = InitiatorMethionineBehavior.Variable; this.fixedModifications = new GraphML_List <Modification>(); this.variableModifications = new GraphML_List <Modification>(); this.maximumVariableModificationIsoforms = 1024; this.MinimumPrecursorChargeState = 1; this.MaximumPrecursorChargeState = 4; this.MaximumNumberOfFragmentsPerSpectrum = 400; //TODO Add precision to the precursor by reading MS part of file this.precursorMassTolerance = new MassTolerance(0.005, MassToleranceUnits.Da);//2.1 //TODO Add precision to the product masses by reading corresponding MS part of raw file this.productMassTolerance = new MassTolerance(0.005, MassToleranceUnits.Da); this.PSMFalseDiscoveryRate = 0.25; // 0.05; this.OutputFolder = @"C:\_IRIC\DATA\Test2"; //C:\Documents and Settings\ProteoAdmin\Desktop\AEffacer\Morpheus\Output"; this.MinimumPSMScore = 0.0001; }
public static void SolveMinimaStyle(List <Dictionary <double, double> > units, Dictionary <double, double> mixed, out List <double> solution, out double underflow, IConSol ConSole) { List <double> localFlows = new List <double>(); foreach (Dictionary <double, double> unit in units) { localFlows.Add(FindLocalMaxima(unit, mixed)); } Dictionary <double, double> virtualMixed = BuildVirtualDic(localFlows, units, mixed.Count); double overError = ComputeOver(virtualMixed, mixed); double underError = ComputeUnder(virtualMixed, mixed); int bestUnit = 0; while (overError >= 1 && bestUnit >= 0) { //double bestFlow = 0; double bestMinima = 0; bestUnit = -1; for (int i = 0; i < units.Count; i++) { if (localFlows[i] > 0) { double minima = FindLocalMinima(units[i], mixed, virtualMixed); //double currentFlow = localFlows[i]; //localFlows[i] = minima; //Dictionary<double, double> tmpDic = BuildVirtualDic(localFlows, units); //double tmpUnderError = ComputeUnder(tmpDic, mixed); //double tmpOverError = ComputeOver(tmpDic, mixed); //double tmpFlowRate = Math.Abs(overError - tmpOverError); //if (tmpUnderError > underError) // tmpFlowRate /= tmpUnderError - underError; //if (tmpFlowRate > bestFlow) if (minima > bestMinima) { //bestFlow = tmpFlowRate; bestMinima = minima; bestUnit = i; } //localFlows[i] = currentFlow; } } if (bestUnit >= 0) { if (bestMinima > 1) { localFlows[bestUnit] -= 1.0;// *0.01 } else { localFlows[bestUnit] -= bestMinima;// *0.01; } if (localFlows[bestUnit] < 0) { localFlows[bestUnit] = 0.0; } virtualMixed = BuildVirtualDic(localFlows, units, mixed.Count); overError = ComputeOver(virtualMixed, mixed); underError = ComputeUnder(virtualMixed, mixed); } }//End of while overflow > 1 solution = new List <double>(); foreach (double localFlow in localFlows) { if (overError <= 1.0) { solution.Add(localFlow); } else { solution.Add(0); } } underflow = underError; }//*/
}//*/ public static void SolveMaxFlowStyle(List <Dictionary <double, double> > units, Dictionary <double, double> mixed, out List <double> solution, out double underflow, IConSol ConSole) { List <double> localFlows = new List <double>(); foreach (Dictionary <double, double> unit in units) { localFlows.Add(FindLocalMaxima(unit, mixed)); } Dictionary <double, double> virtualMixed = BuildVirtualDic(localFlows, units, mixed.Count); double overError = ComputeOver(virtualMixed, mixed); double underError = ComputeUnder(virtualMixed, mixed); double[] bestIndexes = new double[units.Count]; int iterSize = 1; double bestOverallError = double.MaxValue; List <double> bestLocalFlows = new List <double>(); Random rnd = new Random(); while (overError >= 1 && iterSize < 10000)//anything less than 1 is an acceptable solution { for (int index = 0; index < bestIndexes.Length; index++) { bestIndexes[index] = -1; } for (int i = 0; i < units.Count; i++) { if (localFlows[i] > 0) { localFlows[i] -= iterSize; virtualMixed = BuildVirtualDic(localFlows, units, mixed.Count); double tmpErrorOver = ComputeOver(virtualMixed, mixed); double tmpErrorUnder = ComputeUnder(virtualMixed, mixed); double tmpFlowRate = Math.Abs(overError - tmpErrorOver); double underDiff = 0; if (tmpErrorUnder > underError) { underDiff = tmpErrorUnder - underError; } if (underDiff >= 1) { tmpFlowRate /= underDiff; } bestIndexes[i] = tmpFlowRate; localFlows[i] += iterSize; } } //Pick pseudo randomly best index double worstFlowRate = 0.0; for (int index = 0; index < bestIndexes.Length; index++) { if (bestIndexes[index] > worstFlowRate) { worstFlowRate = bestIndexes[index]; } } if (worstFlowRate > 0) { int nbMatching = 0; for (int index = 0; index < bestIndexes.Length; index++) { if (bestIndexes[index] >= worstFlowRate) { nbMatching++; } } int iterChoice = rnd.Next(0, nbMatching - 1); int iterNb = 0; for (int index = 0; index < bestIndexes.Length; index++) { if (bestIndexes[index] >= worstFlowRate) { if (iterChoice == iterNb) { localFlows[index] -= iterSize; if (localFlows[index] < 0) { localFlows[index] = 0.0; } } iterNb++; } } iterSize = 1; } else { iterSize++; } virtualMixed = BuildVirtualDic(localFlows, units, mixed.Count); overError = ComputeOver(virtualMixed, mixed); underError = ComputeUnder(virtualMixed, mixed); if (overError + underError < bestOverallError) { bestLocalFlows = new List <double>(localFlows); bestOverallError = overError + underError; } }//End of while overflow > 1 solution = new List <double>(); foreach (double localFlow in localFlows) { solution.Add(localFlow); } underflow = underError; }
public static void Launch(IConSol console) { vsCSV csv = new vsCSV(@"C:\Users\caronlio\Downloads\Via.Science.Pre.Interview.Assignment.Data.2013.10.18.csv"); Dictionary <DateTime, List <Variable> > DicOfTime = new Dictionary <DateTime, List <Variable> >(); Dictionary <string, List <Variable> > DicOfVar = new Dictionary <string, List <Variable> >(); //Data sorted based on date for (int i = 1; i < csv.LINES_LIST.Count; i++) { Variable tmpVar = new Variable(csv.LINES_LIST[i]); if (!DicOfTime.ContainsKey(tmpVar.time)) { DicOfTime.Add(tmpVar.time, new List <Variable>()); } DicOfTime[tmpVar.time].Add(tmpVar); if (!DicOfVar.ContainsKey(tmpVar.name)) { DicOfVar.Add(tmpVar.name, new List <Variable>()); } DicOfVar[tmpVar.name].Add(tmpVar); } foreach (string name in DicOfVar.Keys) { InterpolateMissingValues(name, DicOfTime, DicOfVar); } //Rebuild DicOfVar DicOfVar.Clear(); foreach (List <Variable> list in DicOfTime.Values) { foreach (Variable variable in list) { if (!DicOfVar.ContainsKey(variable.name)) { DicOfVar.Add(variable.name, new List <Variable>()); } DicOfVar[variable.name].Add(variable); } } //Compute Normalized values NormalizeVariables(DicOfVar); //Foreach variable, compare correlation with the "price" variable List <double> prices = GetArrayofNormed("price", DicOfTime, DicOfVar); Dictionary <string, double> DicOfCorrelation = new Dictionary <string, double>(); foreach (string name in DicOfVar.Keys) { List <double> normedVals = GetArrayofNormed(name, DicOfTime, DicOfVar); double corr = MathNet.Numerics.Statistics.Correlation.Pearson(prices, normedVals); if (name.CompareTo("price") == 0) { Console.WriteLine("test"); } DicOfCorrelation.Add(name, corr); } //Prediction vsCSVWriter output = new vsCSVWriter(@"C:\_IRIC\predictions.csv"); output.AddLine("Time,Price,Prediction"); foreach (DateTime time in DicOfTime.Keys) { double pred = 0; foreach (string name in DicOfCorrelation.Keys) { if (name.CompareTo("price") != 0) { foreach (Variable v in DicOfTime[time]) { if (v.name.CompareTo(name) == 0) { pred += DicOfCorrelation[name] * v.normValue; } } } } pred *= 100000; output.AddLine(pred.ToString()); } output.WriteToFile(); //Export a csv of the varialbes, ordered by date ExportAllVariables(@"C:\_IRIC\assignOut.csv", DicOfTime, DicOfVar, DicOfCorrelation); Console.WriteLine("Done!"); }
/// <summary> /// Creates the options used through the system for peptide identification /// </summary> /// <param name="fastaFile"></param> /// <param name="outputFolder"></param> /// <param name="consol"></param> /// <returns></returns> public static DBOptions CreateOptions(string fastaFile, string outputFolder, double precTolPpm, double prodTolDa, IConSol consol) { DBOptions dbOptions = new DBOptions(fastaFile, consol); dbOptions.precursorMassTolerance = new MassTolerance(precTolPpm, MassToleranceUnits.ppm); dbOptions.productMassTolerance = new MassTolerance(prodTolDa, MassToleranceUnits.Da); dbOptions.MaximumPeptideMass = 200000; dbOptions.OutputFolder = outputFolder; ProteaseDictionary proteases = ProteaseDictionary.Instance; dbOptions.DigestionEnzyme = proteases["no enzyme"]; //dbOptions.DigestionEnzyme = proteases["top-down"]; dbOptions.NoEnzymeSearch = false; dbOptions.DecoyFusion = false; dbOptions.MaximumNumberOfFragmentsPerSpectrum = 400; dbOptions.ToleratedMissedCleavages = 200; dbOptions.MinimumPeptideLength = 5; dbOptions.MaximumPeptideLength = 300; List <Modification> fixMods = new List <Modification>(); dbOptions.fixedModifications = fixMods; List <Modification> varMods = new List <Modification>(); foreach (string strMod in ModificationDictionary.Instance.Keys) { varMods.Add(ModificationDictionary.Instance[strMod]); } dbOptions.maximumVariableModificationIsoforms = 4096;// 2048;// 1024; dbOptions.variableModifications = varMods; dbOptions.addFragmentLoss = false; dbOptions.addFragmentMods = false; dbOptions.SaveMS1Peaks = true; dbOptions.SaveMSMSPeaks = true; dbOptions.LoadSpectraIfFound = true; dbOptions.NbPSMToKeep = 16; dbOptions.fullFragment = new FullFragments(false);//true by default //18 Mars 2014 Uptimized scores dbOptions.dProduct = 0.0917981081138356; dbOptions.dPrecursor = 0.345789190542786; dbOptions.dMatchingProductFraction = 0.427418045898628; dbOptions.dMatchingProduct = 0; dbOptions.dIntensityFraction = 0.429418127252449; dbOptions.dIntensity = 0; dbOptions.dProtein = 0.692270441303156; dbOptions.dPeptideScore = 0.636739763262095; dbOptions.dFragmentScore = 0.0229058195943506; /* * dbOptions.dProduct = 0.0886869235377232; * dbOptions.dPrecursor = 0.714634842572098; * dbOptions.dMatchingProductFraction = 0.432872176371921; * dbOptions.dMatchingProduct = 0.00492531899592156; * dbOptions.dIntensityFraction = 0.73908941342453; * dbOptions.dIntensity = 0;// 0.687398171372431; * dbOptions.dProtein = 0.574124578188231; * dbOptions.dPeptideScore = 0.315866923572434; * dbOptions.dFragmentScore = 0.0322849750669137;//*/ /* * dbOptions.dProduct = 0.0; * dbOptions.dPrecursor = 0.12; * dbOptions.dMatchingProductFraction = 0.45; * dbOptions.dMatchingProduct = 0;// 0; * dbOptions.dIntensityFraction = 0.13; * dbOptions.dIntensity = 0; * dbOptions.dProtein = 0; * dbOptions.dPeptideScore = 0.3; * dbOptions.dFragmentScore = 0.0;//*/ /* * //Morpheus original score * dbOptions.dProduct = 1.0; * dbOptions.dPrecursor = 0; * dbOptions.dMatchingProductFraction = 0; * dbOptions.dMatchingProduct = 0; * dbOptions.dIntensityFraction = 1; * dbOptions.dIntensity = 0; * dbOptions.dProtein = 0; * dbOptions.dPeptideScore = 0; * dbOptions.dFragmentScore = 0.0;//*/ return(dbOptions); }
public static Dictionary <CharacterizedPrecursor, SolvedResult> SolveFromSpectrumBKP(IEnumerable <CharacterizedPrecursor> ratiosToFit, int nbProductsToKeep, long precision, IEnumerable <MsMsPeak> capacity, MassTolerance tolerance, int returnType,//0 for max flow, 1 for best flow, 2 for average double PrecursorIntensityInCTrap, ref double overFlow, ref double underFlow, ref double errorInPercent, IConSol ConSole) { List <List <double> > solutions = new List <List <double> >(); List <long> average = new List <long>(); List <MsMsPeak> expandedCapacity = new List <MsMsPeak>(); double sumOfProducts = 0; foreach (MsMsPeak peak in capacity) { double intensityNormed = peak.Intensity / PrecursorIntensityInCTrap; expandedCapacity.Add(new MsMsPeak(peak.MZ, intensityNormed * precision, peak.Charge)); sumOfProducts += peak.Intensity; } List <List <ProductMatch> > tmpRatiosToFit = new List <List <ProductMatch> >(); //foreach (List<ProductMatch> list in ratiosToFit.Values) foreach (CharacterizedPrecursor prec in ratiosToFit) { List <ProductMatch> pms = new List <ProductMatch>(); foreach (ProductMatch pm in prec.Fragments[nbProductsToKeep]) { ProductMatch newPm = new ProductMatch(pm); newPm.obsIntensity = newPm.normalizedIntensity;// *PrecursorIntensityInCTrap; pms.Add(newPm); } tmpRatiosToFit.Add(pms); } double error = ComputeMaxFlow(tmpRatiosToFit, expandedCapacity, tolerance, ref solutions, ref errorInPercent, ref average, ConSole); double sumOfIntensities = 0; foreach (MsMsPeak peak in expandedCapacity) { sumOfIntensities += peak.Intensity; } overFlow = 0; underFlow = error; List <SolvedResult> result = null; switch (returnType) { case 0: result = GetResultList(solutions[0], precision, underFlow, sumOfIntensities); break; case 1: result = GetResultList(solutions[1], precision, underFlow, sumOfIntensities); break; case 2: List <double> tmpAverage = new List <double>(); foreach (double val in average) { tmpAverage.Add(val); } result = GetResultList(tmpAverage, precision, underFlow, sumOfIntensities); break; } Dictionary <CharacterizedPrecursor, SolvedResult> resultPerSample = new Dictionary <CharacterizedPrecursor, SolvedResult>(); int i = 0; foreach (CharacterizedPrecursor key in ratiosToFit) { resultPerSample.Add(key, result[i]); i++; } return(resultPerSample); }
public static Dictionary <PeptideSpectrumMatch, SolvedResult> SolveFromFragmentScore(int nbProductsToKeep, double precision, IEnumerable <MsMsPeak> capacity, MassTolerance tolerance, double PrecursorIntensityInCTrap, Query query, out double underFlow, out double percentError, IConSol ConSole) { List <Dictionary <double, double> > unitSpectrum = new List <Dictionary <double, double> >(); foreach (PeptideSpectrumMatch psm in query.psms) { double[] arrayFrag = psm.ComputeAACoverage(); Dictionary <double, double> individualSpectrum = new Dictionary <double, double>(); for (int i = 0; i < arrayFrag.Length; i++) { individualSpectrum.Add(i, arrayFrag[i]); } unitSpectrum.Add(individualSpectrum); } Dictionary <double, double> mixedSpectrum = new Dictionary <double, double>(); string seq = query.psms[0].Peptide.BaseSequence; for (int i = 0; i < seq.Length; i++) { mixedSpectrum.Add(i, precision); } List <double> solution = new List <double>(); double tmpUnderflow = 0; Utilities.Methods.GradientDescent.SolveMaxFlowStyle(unitSpectrum, mixedSpectrum, out solution, out tmpUnderflow, ConSole, 1); double sumOfIntensities = 0; foreach (double val in mixedSpectrum.Values) { sumOfIntensities += val; } underFlow = tmpUnderflow; List <SolvedResult> result = GetResultList(solution, precision, underFlow, sumOfIntensities); Dictionary <PeptideSpectrumMatch, SolvedResult> resultPerSample = new Dictionary <PeptideSpectrumMatch, SolvedResult>(); int k = 0; foreach (PeptideSpectrumMatch key in query.psms) { resultPerSample.Add(key, result[k]); k++; } percentError = (underFlow / sumOfIntensities); return(resultPerSample); }
public static Dictionary <PeptideSpectrumMatch, SolvedResult> SolveFromFragmentScoreTheoMZ(int nbProductsToKeep, double precision, IEnumerable <MsMsPeak> capacity, MassTolerance tolerance, double PrecursorIntensityInCTrap, Query query, out double underFlow, out double percentError, IConSol ConSole) { List <Dictionary <double, double> > unitSpectrum = new List <Dictionary <double, double> >(); foreach (PeptideSpectrumMatch psm in query.psms) { double[] arrayFrag = psm.ComputeAACoverage(); Dictionary <double, double> individualSpectrum = new Dictionary <double, double>(); foreach (ProductMatch match in psm.AllProductMatches) { if (!individualSpectrum.ContainsKey(match.theoMz)) { individualSpectrum.Add(match.theoMz, 0); } double addedFactor = individualSpectrum[match.theoMz]; if (match.Fragment == null) { addedFactor += arrayFrag[match.fragmentPos]; } else if (match.Fragment.IsReverse) { for (int i = match.fragmentPos - 1; i < arrayFrag.Length; i++) { addedFactor += arrayFrag[i]; } } else { for (int i = match.fragmentPos - 1; i >= 0; i--) { addedFactor += arrayFrag[i]; } } individualSpectrum[match.theoMz] += addedFactor; } double sum = 0.0; foreach (double val in individualSpectrum.Values) { sum += val; } foreach (double key in new List <double>(individualSpectrum.Keys)) { individualSpectrum[key] /= sum; } unitSpectrum.Add(individualSpectrum); } Dictionary <double, double> mixedSpectrum = new Dictionary <double, double>(); foreach (Dictionary <double, double> dic in unitSpectrum) { foreach (double key in dic.Keys) { if (!mixedSpectrum.ContainsKey(key)) { mixedSpectrum.Add(key, precision); } } } double nbKeys = (double)mixedSpectrum.Count; foreach (double key in new List <double>(mixedSpectrum.Keys)) { mixedSpectrum[key] /= nbKeys; } //string seq = query.psms[0].Peptide.BaseSequence; //for (int i = 0; i < seq.Length; i++) // mixedSpectrum.Add(i, precision); List <double> solution = new List <double>(); double tmpUnderflow = 0; Utilities.Methods.GradientDescent.SolveMaxFlowStyle(unitSpectrum, mixedSpectrum, out solution, out tmpUnderflow, ConSole, 1); double sumOfIntensities = 0; foreach (double val in mixedSpectrum.Values) { sumOfIntensities += val; } underFlow = tmpUnderflow; List <SolvedResult> result = GetResultList(solution, precision, underFlow, sumOfIntensities); Dictionary <PeptideSpectrumMatch, SolvedResult> resultPerSample = new Dictionary <PeptideSpectrumMatch, SolvedResult>(); int k = 0; foreach (PeptideSpectrumMatch key in query.psms) { resultPerSample.Add(key, result[k]); k++; } percentError = (underFlow / sumOfIntensities); return(resultPerSample); }
public static Dictionary <CharacterizedPrecursor, SolvedResult> SolveFromSpectrum(IEnumerable <CharacterizedPrecursor> ratiosToFit, int nbProductsToKeep, double precision, IEnumerable <MsMsPeak> capacity, MassTolerance tolerance, double PrecursorIntensityInCTrap, out double underFlow, out double percentError, IConSol ConSole) { bool keepGoing = true; Dictionary <double, double> mixedSpectrum = new Dictionary <double, double>(); List <Dictionary <double, double> > unitSpectrum = new List <Dictionary <double, double> >(); foreach (CharacterizedPrecursor isomer in ratiosToFit) { foreach (double key in isomer.NormalizedFragments[nbProductsToKeep].Keys) { if (!mixedSpectrum.ContainsKey(key)) { double cumulIntensity = 0.0; foreach (MsMsPeak peak in capacity) { if (Math.Abs(Proteomics.Utilities.Numerics.CalculateMassError(peak.MZ, key, tolerance.Units)) <= tolerance.Value) { cumulIntensity += peak.Intensity; } } mixedSpectrum.Add(key, cumulIntensity * precision / PrecursorIntensityInCTrap); } } if (isomer.NormalizedFragments.ContainsKey(nbProductsToKeep)) { unitSpectrum.Add(isomer.NormalizedFragments[nbProductsToKeep]); } else { keepGoing = false; } } if (keepGoing) { List <double> solution = new List <double>(); double tmpUnderflow = 0; Proteomics.Utilities.Methods.GradientDescent.SolveMaxFlowStyle(unitSpectrum, mixedSpectrum, out solution, out tmpUnderflow, ConSole); double sumOfIntensities = 0; foreach (double val in mixedSpectrum.Values) { sumOfIntensities += val; } underFlow = tmpUnderflow; List <SolvedResult> result = GetResultList(solution, precision, underFlow, sumOfIntensities); Dictionary <CharacterizedPrecursor, SolvedResult> resultPerSample = new Dictionary <CharacterizedPrecursor, SolvedResult>(); int i = 0; foreach (CharacterizedPrecursor key in ratiosToFit) { resultPerSample.Add(key, result[i]); i++; } percentError = (underFlow / sumOfIntensities); return(resultPerSample); } else { percentError = 1.0; underFlow = 0; return(new Dictionary <CharacterizedPrecursor, SolvedResult>()); } }
public static void Launch(IConSol console) { //@"G:\Thibault\Olivier\MnR\Databases\BD_RefGenome_WithReverse_2012-06-20.fasta"; //Trypsin string outputDir = @"C:\_IRIC\DATA\Yeast\Results\"; string fastaFile = @"C:\_IRIC\DATA\Yeast\Yeast_SwissProt.fasta";//Yeast //@"G:\Thibault\Olivier\MQ_vs_Morpheus\Yeast_SwissProt.fasta";//Yeast //@"G:\Thibault\Olivier\Databases\SProHNoIso_20130430\current\sequences_2013-05-30.fa"; //G:\Thibault\Olivier\MnR\Databases\mini_human_reference_2013-26-03.fasta";//Yeast string projectFile = @"C:\_IRIC\DATA\Yeast\project.csv";//Yeast //@"G:\Thibault\Olivier\MQ_vs_Morpheus\project.csv";//Yeast //@"G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JAN22_2013\_Project_FL_Single.csv"; //G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\JUN27_2012\MR 4Rep DS\MassSense\_Test_ProjectFile_MF3.csv"; //G:\Thibault\-=Proteomics_Raw_Data=-\ELITE\MAR18_2013\ProjectFile_TestForProPheus.csv"; DBOptions dbOptions = new DBOptions(fastaFile, console); Samples Project = new Samples(projectFile, 0, dbOptions); dbOptions.precursorMassTolerance = new MassTolerance(8 /*8*//*8withoutisotopes*/, MassToleranceUnits.ppm); dbOptions.productMassTolerance = new MassTolerance(0.034 /*0.034*//*without isotopes*/, MassToleranceUnits.Da);//0.034 is a 60 000 resolution over 2000 range in mz //dbOptions.productMassTolerance = new MassTolerance(20, MassToleranceUnits.ppm); dbOptions.MaximumPeptideMass = 200000; dbOptions.OutputFolder = outputDir; ProteaseDictionary proteases = ProteaseDictionary.Instance; dbOptions.DigestionEnzyme = proteases["trypsin (no proline rule)"]; //"no enzyme"]; dbOptions.NoEnzymeSearch = false; // true; dbOptions.DecoyFusion = false; //dbOptions.protease = proteases["trypsin (no proline rule)"]; dbOptions.ToleratedMissedCleavages = 2; dbOptions.MinimumPeptideLength = 5; dbOptions.MaximumPeptideLength = 300; GraphML_List <Modification> fixMods = new GraphML_List <Modification>(); fixMods.Add(ModificationDictionary.Instance["carbamidomethylation of C"]); dbOptions.fixedModifications = fixMods; GraphML_List <Modification> varMods = new GraphML_List <Modification>(); //Oxidation (M);Acetyl (Protein N-term);Phospho (STY) //Mods for Yeast varMods.Add(ModificationDictionary.Instance["oxidation of M"]); varMods.Add(ModificationDictionary.Instance["acetylation of protein N-terminus"]); varMods.Add(ModificationDictionary.Instance["phosphorylation of S"]); varMods.Add(ModificationDictionary.Instance["phosphorylation of T"]); varMods.Add(ModificationDictionary.Instance["phosphorylation of Y"]); //*/ dbOptions.maximumVariableModificationIsoforms = 2 * (varMods.Count + fixMods.Count); //TODO Evaluate the viability of this parameter dbOptions.variableModifications = varMods; dbOptions.NbPSMToKeep = 16; dbOptions.addFragmentLoss = false; dbOptions.addFragmentMods = false; dbOptions.fragments = new Fragments(); dbOptions.fragments.Add(new FragmentA()); dbOptions.fragments.Add(new FragmentB()); dbOptions.fragments.Add(new FragmentC()); dbOptions.fragments.Add(new FragmentX()); dbOptions.fragments.Add(new FragmentY()); dbOptions.fragments.Add(new FragmentZ()); dbOptions.dProduct = 0.0; dbOptions.dPrecursor = 0.1; // 0.12; dbOptions.dMatchingProductFraction = 0.8; // 0.45; dbOptions.dMatchingProduct = 0.0; // 0.5; dbOptions.dIntensityFraction = 0.1; // 45;// 0.0;//0.13; dbOptions.dIntensity = 0; dbOptions.dProtein = 0; dbOptions.dPeptideScore = 0.0; // 0.3; dbOptions.dFragmentScore = 0.0; // 0.5; //ClusterOptions clusterOptions = new ClusterOptions(Project, outputDir, 5, true, 90, true);//TODO validate its in seconds for all file types dbOptions.SaveMS1Peaks = true; dbOptions.SaveMSMSPeaks = true; dbOptions.LoadSpectraIfFound = true; Propheus propheus = new Propheus(dbOptions, Project); propheus.Preload(false, false); propheus.PrepareQueries(); //To beat : 4653 (MaxQuant) Psm at 2%FDR //First pass (used to optimize parameters and score weights) Result tmp = propheus.SearchLatestVersion(propheus.AllQueries, true, false);//, 1.0, false, false, null); tmp.WriteInfoToCsv(true); tmp.Export(0.02, "FirstPass_02_"); //Second search propheus.Preload(true); propheus.PrepareQueries(); Result finalRez = propheus.SearchLatestVersion(propheus.AllQueries, false);//, 1.0, false, false, null); //tmp.Export(0.05, "05_"); tmp.Export(0.02, "02_"); //tmp.Export(0.05, "05_AllFragments"); // tmp.Export(0.01, "01_"); //tmp.Export(double.MaxValue, "All_"); //tmp.WriteInfoToConsole(); /* * Optimizer op = new Optimizer(propheus); * op.LaunchBestPSMOptimization(tmp);//.proteins, propheus.AllQueries); * //*/ //Optimizer op = new Optimizer(propheus); //MSSearcher.Export(dbOptions.outputFolder + "5PercentOptimized_precursors.csv", Optimizer.PrecursorOptimizer(tmp.precursors, 0.05)); //op.LaunchBestPSMOptimization(tmp);//.proteins, propheus.AllQueries); //op.LaunchPrecursorScoreOptimization(tmp);//.proteins, propheus.AllQueries); //op.Launch(tmp.proteins, propheus.AllQueries); /* * propheus.Align(tmp); * * Result tmp2 = propheus.Search(1.0, false, null, propheus.CreateQueries(propheus.AllSpectras)); * tmp2.Export(0.05, "Aligned_05_"); * tmp2.Export(double.MaxValue, "Aligned_All_"); * MSSearcher.Export(dbOptions.outputFolder + "Aligned_5PercentOptimized_precursors.csv", Optimizer.PrecursorOptimizer(tmp2.precursors, 0.05)); * tmp.WriteInfoToConsole();//*/ }
/// <summary> /// Provides deconvoluted elution curves of mixed spectra from the provided raw files using the provided synthetic raw file /// Exports in CSV files and stores everything in class objects /// </summary> /// <param name="spikedRaws"></param> /// <param name="mixedRaws"></param> /// <param name="fastaFile"></param> /// <param name="folderToOutputTo"></param> /// <param name="conSol"></param> public void Solve(string[] spikedRaws, string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol) { dbOptions = CreateOptions(fastaFile, folderToOutputTo, precTolPpm, prodTolDa, conSol); SpikedSamples = new Samples(dbOptions); for (int i = 0; i < spikedRaws.Length; i++) { SpikedSamples.Add(new Sample(i + 1, 1, 1, spikedRaws[i], spikedRaws[i], 0, "")); } //Precompute Spiked peptide identifications SpikedResult = Ace.Start(dbOptions, SpikedSamples, false, false); SpikedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "SpikedSamplesPSMs.csv"); MixedSamples = new Samples(dbOptions); for (int i = 0; i < mixedRaws.Length; i++) { MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, "")); } //Precompute Mixed peptide identifications mixedResult = Ace.Start(dbOptions, MixedSamples, false, false); if (mixedResult == null) { conSol.WriteLine("OOPS! No queries could be extracted from the list of mixed spectrum files..."); } else { mixedResult.ExportPSMs(1, dbOptions.OutputFolder + "Identifications" + System.IO.Path.DirectorySeparatorChar + "MixedSamplesPSMs.csv"); conSol.WriteLine("Computing gradient descents..."); //Compute all usable spiked peptides characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(SpikedSamples, SpikedResult, dbOptions, nbMinFragments, nbMaxFragments); ExportSpikedSampleResult(characterizedPeptides, dbOptions); vsCSVWriter writerCumul = new vsCSVWriter(OutputFolder + "Results.csv"); string titleCombined = "Mixed Sample,Precursor"; string curveStr = "Polynomial Curve,"; string spikedIntensityStr = "Area under the curve,"; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge; if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3) { curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2]; } else { curveStr += ",NA"; } spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area; } } writerCumul.AddLine(titleCombined); writerCumul.AddLine(curveStr); writerCumul.AddLine(spikedIntensityStr); mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in MixedSamples) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides)); } //Get the list of precursors to characterize foreach (Sample mixedSample in MixedSamples) { foreach (double keyMz in characterizedPeptides.Keys) { List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> >(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios = GetRatios(characterizedPeptides, mPrec, dbOptions, nbMinFragments, nbMaxFragments); listOfRatios.Add(ratios); ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions); } } bool isEmpty = true; string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { double cumulArea = 0.0; foreach (Dictionary <CharacterizedPrecursor, MaxFlowElutionCurve> ratios in listOfRatios) { if (ratios.ContainsKey(charPrec)) { cumulArea += ratios[charPrec].eCurvePerMs.Area; } } resultStr += "," + cumulArea; if (cumulArea > 0) { isEmpty = false; } } } if (!isEmpty) { writerCumul.AddLine(resultStr); } } } writerCumul.WriteToFile(); //List Modifications Dictionary <Modification, double> dicOfIntensityPerMod = new Dictionary <Modification, double>(); foreach (Sample sample in mixedPrecursors.Keys) { foreach (MixedPrecursor mP in mixedPrecursors[sample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (cP.Peptide.VariableModifications != null) { foreach (Modification mod in cP.Peptide.VariableModifications.Values) { if (!dicOfIntensityPerMod.ContainsKey(mod)) { dicOfIntensityPerMod.Add(mod, 0.0); } } } } } } //Compute site occupancy for identical sequences (real positionnal isomers) vsCSVWriter writerSitesOccupancy = new vsCSVWriter(OutputFolder + "Results_SiteOccupancy.csv"); List <Protein> AllProteins = Ace.ReadProteomeFromFasta(fastaFile, false, dbOptions); foreach (Protein protein in AllProteins) { string newTitleProtein = protein.Description.Replace(',', ' ') + "," + protein.Sequence; for (int i = 0; i < protein.Sequence.Length; i++) { newTitleProtein += "," + protein[i].ToString(); } writerSitesOccupancy.AddLine(newTitleProtein); foreach (Sample mixedSample in mixedPrecursors.Keys) { string coverage = "Coverage," + mixedSample.Name; for (int i = 0; i < protein.Sequence.Length; i++) { double cumulSite = 0.0; newTitleProtein += "," + protein[i].ToString(); foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber) { cumulSite += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } coverage += "," + cumulSite; } writerSitesOccupancy.AddLine(coverage); } foreach (Modification mod in dicOfIntensityPerMod.Keys) { Dictionary <Sample, string> dicOfLines = new Dictionary <Sample, string>(); for (int i = 0; i < protein.Sequence.Length; i++) { foreach (Sample mixedSample in mixedPrecursors.Keys) { double cumulModArea = 0.0; foreach (MixedPrecursor mP in mixedPrecursors[mixedSample]) { foreach (CharacterizedPrecursor cP in mP.PeptideRatios.Keys) { if (i + 1 >= cP.Peptide.StartResidueNumber && i + 1 <= cP.Peptide.EndResidueNumber && cP.Peptide.VariableModifications != null) { foreach (int pos in cP.Peptide.VariableModifications.Keys) { if (cP.Peptide.StartResidueNumber + pos - 2 == i + 1 && cP.Peptide.VariableModifications[pos] == mod) { cumulModArea += mP.PeptideRatios[cP].eCurvePerMs.Area; } } } } } if (!dicOfLines.ContainsKey(mixedSample)) { dicOfLines.Add(mixedSample, mod.Description + "," + mixedSample.Name + "," + cumulModArea); } else { dicOfLines[mixedSample] += "," + cumulModArea; } } } foreach (string line in dicOfLines.Values) { writerSitesOccupancy.AddLine(line); } } } writerSitesOccupancy.WriteToFile(); } }
private static double ComputeMaxFlow(List <List <ProductMatch> > spikedMatches, List <MsMsPeak> mixedSpectrum, MassTolerance tolerance, ref List <List <double> > optimalSolutions, ref double percentError, ref List <long> average, IConSol ConSole) { //Create dictionnary of usefull peaks Dictionary <float, double> mixedFragDic = new Dictionary <float, double>(); foreach (List <ProductMatch> fragmentRatio in spikedMatches) { foreach (ProductMatch match in fragmentRatio) { if (!mixedFragDic.ContainsKey((float)match.theoMz)) { float closest = -1; foreach (float key in mixedFragDic.Keys) { if (Math.Abs(Utilities.Numerics.CalculateMassError(match.theoMz, key, tolerance.Units)) <= tolerance.Value) { closest = key; } } if (closest > 0) { //ConSole.WriteLine("Potential problem with selected fragment masses "); match.theoMz = closest; } else { mixedFragDic.Add((float)match.theoMz, 0); } } } } //Fill dictionnary with Intensities List <float> keys = new List <float>(mixedFragDic.Keys); foreach (MsMsPeak peak in mixedSpectrum) { foreach (float mz in keys) { if (Math.Abs(Utilities.Numerics.CalculateMassError(peak.MZ, mz, tolerance.Units)) <= tolerance.Value) { mixedFragDic[mz] += peak.Intensity; } } } List <long> localFlows = new List <long>(); foreach (List <ProductMatch> fragmentRatio in spikedMatches) { localFlows.Add(FindLocalMaximumFlow(fragmentRatio, mixedFragDic)); } Dictionary <float, double> virtualSpectrum = BuildVirtualSpectrum(spikedMatches, localFlows, mixedFragDic); double overError = MaxFlowHelper.ComputeOverflow(virtualSpectrum, mixedFragDic); double underError = MaxFlowHelper.ComputeUnderflow(virtualSpectrum, mixedFragDic); double[] bestIndexes = new double[spikedMatches.Count]; int iterSize = 1; double bestOverallError = double.MaxValue; List <long> bestLocalFlows = new List <long>(); Random rnd = new Random(); while (overError >= 1 && iterSize < 10000)//anything less than 1 is an acceptable solution { for (int index = 0; index < bestIndexes.Length; index++) { bestIndexes[index] = -1; } for (int i = 0; i < spikedMatches.Count; i++) { if (localFlows[i] > 0) { localFlows[i] -= iterSize; virtualSpectrum = BuildVirtualSpectrum(spikedMatches, localFlows, mixedFragDic); double tmpErrorMinus = MaxFlowHelper.ComputeUnderflow(virtualSpectrum, mixedFragDic); double tmpErrorPlus = MaxFlowHelper.ComputeOverflow(virtualSpectrum, mixedFragDic); double tmpFlowRate = Math.Abs(overError - tmpErrorPlus); double underDiff = Math.Abs(underError - tmpErrorMinus); if (underDiff >= 1) { tmpFlowRate /= underDiff; } bestIndexes[i] = tmpFlowRate; localFlows[i] += iterSize; } } //Pick pseudo randomly best index double worstFlowRate = 0.0; for (int index = 0; index < bestIndexes.Length; index++) { if (bestIndexes[index] > worstFlowRate) { worstFlowRate = bestIndexes[index]; } } if (worstFlowRate > 0) { int nbMatching = 0; for (int index = 0; index < bestIndexes.Length; index++) { if (bestIndexes[index] >= worstFlowRate) { nbMatching++; } } int iterChoice = rnd.Next(0, nbMatching - 1); int iterNb = 0; for (int index = 0; index < bestIndexes.Length; index++) { if (bestIndexes[index] >= worstFlowRate) { if (iterChoice == iterNb) { localFlows[index] -= iterSize; } iterNb++; } } iterSize = 1; } else { iterSize++; } virtualSpectrum = BuildVirtualSpectrum(spikedMatches, localFlows, mixedFragDic); overError = MaxFlowHelper.ComputeOverflow(virtualSpectrum, mixedFragDic); underError = MaxFlowHelper.ComputeUnderflow(virtualSpectrum, mixedFragDic); if (overError + underError < bestOverallError) { bestLocalFlows = new List <long>(localFlows); bestOverallError = overError + underError; } }//End of while overflow > 1 optimalSolutions.Clear(); List <double> newList = new List <double>(); foreach (long localFlow in localFlows) { newList.Add(localFlow); } optimalSolutions.Add(newList); newList = new List <double>(); foreach (long localFlow in bestLocalFlows) { newList.Add(localFlow); } optimalSolutions.Add(newList); //Compute average if (bestOverallError < double.MaxValue) { average.Clear(); for (int i = 0; i < optimalSolutions[0].Count; i++) { double sum = 0.0; foreach (List <double> solution in optimalSolutions) { sum += solution[i]; } double avg = sum / (double)optimalSolutions.Count; average.Add((long)avg); } } //Compute expected error in percentage double sumOfIntensities = 0; foreach (double val in mixedFragDic.Values) { sumOfIntensities += val; } percentError = underError / sumOfIntensities; virtualSpectrum = BuildVirtualSpectrum(spikedMatches, localFlows, mixedFragDic); return(MaxFlowHelper.ComputeUnderflow(virtualSpectrum, mixedFragDic)); }
/// <summary> /// Extract isomer ratios from a given spectrum (transformed into capacity vector) /// </summary> /// <param name="ratiosToFit"></param> /// <param name="nbProductsToKeep"></param> /// <param name="capacity"></param> /// <param name="tolerance"></param> /// <param name="PrecursorIntensityInCTrap"></param> /// <param name="PrecursorIntensity"></param> /// <param name="underFlow"></param> /// <param name="percentError"></param> /// <param name="ConSole"></param> /// <returns></returns> public static Dictionary <CharacterizedPrecursor, SolvedResult> SolveFromSpectrum(IEnumerable <CharacterizedPrecursor> ratiosToFit, int nbProductsToKeep, IEnumerable <MsMsPeak> capacity, MassTolerance tolerance, double PrecursorIntensityInCTrap, double PrecursorIntensity, out double underFlow, out double percentError, IConSol ConSole, string fileOut = null) { bool keepGoing = true; Dictionary <double, double> mixedSpectrum = new Dictionary <double, double>(); List <Dictionary <double, double> > unitSpectrum = new List <Dictionary <double, double> >(); foreach (CharacterizedPrecursor isomer in ratiosToFit) { foreach (double key in isomer.NormalizedFragments[nbProductsToKeep].Keys) { if (!mixedSpectrum.ContainsKey(key)) { double cumulIntensity = 0.0; foreach (MsMsPeak peak in capacity) { if (Math.Abs(Utilities.Numerics.CalculateMassError(peak.MZ, key, tolerance.Units)) <= tolerance.Value) { cumulIntensity += peak.Intensity; } } mixedSpectrum.Add(key, cumulIntensity);// / PrecursorIntensityInCTrap); } } if (isomer.NormalizedFragments.ContainsKey(nbProductsToKeep)) { if (isomer.FragmentNormalizor.ContainsKey(nbProductsToKeep)) { Dictionary <double, double> dic = new Dictionary <double, double>(); foreach (double key in isomer.NormalizedFragments[nbProductsToKeep].Keys) { dic.Add(key, isomer.NormalizedFragments[nbProductsToKeep][key] * isomer.FragmentNormalizor[nbProductsToKeep].InterpolateIntensity(PrecursorIntensityInCTrap)); } unitSpectrum.Add(dic); } else { unitSpectrum.Add(isomer.NormalizedFragments[nbProductsToKeep]); } } else { keepGoing = false; } } vsCSVWriter writerFrag = null; if (!string.IsNullOrEmpty(fileOut)) { writerFrag = new vsCSVWriter(fileOut); string line = "Fragments:"; foreach (double key in mixedSpectrum.Keys) { line += "," + key; } writerFrag.AddLine(line); line = "Mixed:"; foreach (double val in mixedSpectrum.Values) { line += "," + val; } writerFrag.AddLine(line); } //This nbProduct seems relevant, try to use isomer to get ratios for this spectrum if (keepGoing) { List <double> solution = new List <double>(); double stepSize = PrecursorIntensityInCTrap / 1000.0; if (stepSize < 1) { stepSize = 1; } double tmpUnderflow = 0; Utilities.Methods.GradientDescent.SolveMaxFlowStyle(unitSpectrum, mixedSpectrum, out solution, out tmpUnderflow, ConSole, stepSize); //Utilities.Methods.GradientDescent.SolveFromGradientDescent(unitSpectrum, mixedSpectrum, PrecursorIntensityInCTrap, out solution, out tmpUnderflow, ConSole); double sumOfIntensities = 0; foreach (double val in mixedSpectrum.Values) { sumOfIntensities += val; } underFlow = tmpUnderflow; List <SolvedResult> result = GetResultList(solution, underFlow, sumOfIntensities); Dictionary <CharacterizedPrecursor, SolvedResult> resultPerSample = new Dictionary <CharacterizedPrecursor, SolvedResult>(); int i = 0; foreach (CharacterizedPrecursor key in ratiosToFit) { resultPerSample.Add(key, result[i]); i++; } if (writerFrag != null) { foreach (CharacterizedPrecursor cPrec in ratiosToFit) { string line = cPrec.Peptide.Sequence; foreach (double key in mixedSpectrum.Keys) { line += "," + cPrec.NormalizedFragments[nbProductsToKeep][key] * resultPerSample[cPrec].NbFitTimes; } writerFrag.AddLine(line); } writerFrag.WriteToFile(); } percentError = (underFlow / sumOfIntensities); return(resultPerSample); } else { percentError = 1.0; underFlow = 0; return(new Dictionary <CharacterizedPrecursor, SolvedResult>()); } }
public void Solve(string[] mixedRaws, string fastaFile, string folderToOutputTo, IConSol conSol) { dbOptions = CreateOptions(fastaFile, folderToOutputTo, conSol); MixedSamples = new Samples(dbOptions); for (int i = 0; i < mixedRaws.Length; i++) { MixedSamples.Add(new Sample(i + 1, 1, 1, mixedRaws[i], mixedRaws[i], 0, "")); } //Precompute Mixed peptide identifications mixedResult = Ace.Start(dbOptions, MixedSamples, false, false); conSol.WriteLine("Computing gradient descents..."); //Compute all usable spiked peptides characterizedPeptides = CharacterizedPrecursor.GetSpikedPrecursors(MixedSamples, mixedResult, dbOptions, nbMinFragments, nbMaxFragments); ExportSpikedSampleResult(characterizedPeptides, dbOptions); vsCSVWriter writerCumul = new vsCSVWriter(OutputFolder + "Results.csv"); string titleCombined = "Mixed Sample,Precursor"; string curveStr = "Polynomial Curve,"; string spikedIntensityStr = "Area under the curve,"; foreach (double precursor in characterizedPeptides.Keys) { foreach (CharacterizedPrecursor charPrec in characterizedPeptides[precursor].Values) { titleCombined += "," + charPrec.Peptide.Sequence + " Charge " + charPrec.Charge; if (charPrec.eCurveIntensityCount.Coefficients != null && charPrec.eCurveIntensityCount.Coefficients.Length == 3) { curveStr += "," + charPrec.eCurveIntensityCount.Coefficients[0] + "x^2 + " + charPrec.eCurveIntensityCount.Coefficients[1] + "x" + charPrec.eCurveIntensityCount.Coefficients[2]; } else { curveStr += ",NA"; } spikedIntensityStr += "," + charPrec.eCurveIntensityCount.Area; } } writerCumul.AddLine(titleCombined); writerCumul.AddLine(curveStr); writerCumul.AddLine(spikedIntensityStr); //mixedPrecursors = new Dictionary<Sample, Dictionary<double, MixedPrecursor>>(); mixedPrecursors = new Dictionary <Sample, List <MixedPrecursor> >(); foreach (Sample mixedSample in MixedSamples) { mixedPrecursors.Add(mixedSample, MixedPrecursor.GetMixedPrecursors(mixedSample, mixedResult, dbOptions, characterizedPeptides)); } //Get the list of precursors to characterize foreach (Sample mixedSample in MixedSamples) { foreach (double keyMz in characterizedPeptides.Keys) { List <Dictionary <Peptide, MaxFlowElutionCurve> > listOfRatios = new List <Dictionary <Peptide, MaxFlowElutionCurve> >(); foreach (MixedPrecursor mPrec in mixedPrecursors[mixedSample]) { if (mPrec.MZ == keyMz) { // Compute Max Flow for this precursor Dictionary <Peptide, MaxFlowElutionCurve> ratios = GetRatiosNoSpikes(mPrec, precision); listOfRatios.Add(ratios); ExportMixedSampleResult(ratios, mixedSample, mPrec, keyMz, dbOptions); } } /* * string resultStr = vsCSV.GetFileName(mixedSample.sSDF) + "," + keyMz; * foreach (double precursor in characterizedPeptides.Keys) * { * foreach (Peptide charPrec in characterizedPeptides[precursor].Values) * { * double cumulArea = 0.0; * foreach (Dictionary<Peptide, ElutionCurve> ratios in listOfRatios) * if (ratios.ContainsKey(charPrec)) * cumulArea += ratios[charPrec].Area; * resultStr += "," + cumulArea; * } * } * writerCumul.AddLine(resultStr);//*/ } } writerCumul.WriteToFile(); }