private void PlotFit(PoolParameters poolParams, string label, double kbi = 1000000) //default kbi is just a very high number to simulate instantaneous turnover (proxy for the available amino acid pool) { double[] timepoints = new double[1000]; for (int i = 0; i < timepoints.Length; i++) { timepoints[i] = i / 10.0; } //half-life = ln(2)/kbt, make half life 0, kbt = infinity double[] rfs = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, kbi, timepoints); Dispatcher.Invoke(() => { RatioComparisonPlot.plt.Layout(titleHeight: 20, xLabelHeight: 40, y2LabelWidth: 20); RatioComparisonPlot.plt.XLabel("Time (Days)", fontSize: 20); // RatioComparisonPlot.plt.YLabel("Relative Fraction (Lys0/Total)", fontSize: 20); RatioComparisonPlot.plt.YLabel("Lys0 / LysTotal", fontSize: 20); RatioComparisonPlot.plt.Axis(0, 100, 0, 1); RatioComparisonPlot.plt.Ticks(fontSize: 18); RatioComparisonPlot.plt.PlotScatter(timepoints, rfs, label: label, markerShape: ScottPlot.MarkerShape.none); if (DisplayLegendCheckBox.IsChecked.Value) { RatioComparisonPlot.plt.Legend(); } RatioComparisonPlot.Render(); }); }
private void UpdateGlobalVisualization(PoolParameters customParams = null) { if (DisplayAnalyzedFilesDataGrid.SelectedItem == null) { return; } //plot the precision scatter plot and the half-life histogram for this file string dataFile = ((RawDataForDataGrid)DisplayAnalyzedFilesDataGrid.SelectedItem).FilePath; if (peptideRadioButton.IsChecked.Value) { List <PeptideTurnoverObject> peptidesForThisFile = AllPeptides.Where(x => x.FileName.Equals(dataFile)).ToList(); if (customParams != null || PoolParameterDictionary.ContainsKey(dataFile)) //if the file has been analyzed { bool firstTime = customParams == null; customParams = customParams ?? PoolParameterDictionary[dataFile]; ChangeParamTextBox = false; if (firstTime) { KstTB.Text = customParams.Kst.ToString(); KbtTB.Text = customParams.Kbt.ToString(); KaoTB.Text = customParams.Kao.ToString(); } MseTB.Text = peptidesForThisFile.Average(x => x.Error).ToString(); ChangeParamTextBox = true; PlotPrecisionScatterPlot(peptidesForThisFile, customParams); PlotHalfLifeHistogram(peptidesForThisFile); } } else //graph proteins { List <PeptideTurnoverObject> proteinsForThisFile = AnalyzedProteins.Where(x => x.FileName.Equals(dataFile)).ToList(); //hidden code for producing paper figures with both blood types in a single histogram if (dataFile.Contains("lood")) { //combine the two proteinsForThisFile = AnalyzedProteins.Where(x => x.FileName.Contains("lood")).ToList(); } if (customParams != null || PoolParameterDictionary.ContainsKey(dataFile)) //if the file has been analyzed { customParams = customParams ?? PoolParameterDictionary[dataFile]; PlotPrecisionScatterPlot(proteinsForThisFile, customParams); PlotHalfLifeHistogram(proteinsForThisFile); } } }
private void ParamApply_Click(object sender, RoutedEventArgs e) { if (DisplayAnalyzedFilesDataGrid.SelectedItem == null) { return; } assignParamsButton.IsEnabled = false; string dataFile = ((RawDataForDataGrid)DisplayAnalyzedFilesDataGrid.SelectedItem).FilePath; PoolParameters customParams = new PoolParameters(Convert.ToDouble(KstTB.Text), Convert.ToDouble(KbtTB.Text), Convert.ToDouble(KaoTB.Text)); PoolParameterDictionary[dataFile] = customParams; List <PeptideTurnoverObject> peptidesForThisFile = AllPeptides.Where(x => x.FileName.Equals(dataFile)).ToList(); NonLinearRegression.UpdateKbi(customParams.Kst, customParams.Kbt, customParams.Kao, peptidesForThisFile, 0.001); NonLinearRegression.UpdateKbi(customParams.Kst, customParams.Kbt, customParams.Kao, peptidesForThisFile, 0.0001); NonLinearRegression.UpdateKbi(customParams.Kst, customParams.Kbt, customParams.Kao, peptidesForThisFile, 0.00001); MseTB.Text = peptidesForThisFile.Sum(x => x.Error).ToString(); UpdateGlobalVisualization(); assignParamsButton.IsEnabled = true; //CreateMapForLocalMinimaSearch(); }
public static PoolParameters RegressionAnalysis(List <PeptideTurnoverObject> peptides, string filePath, Settings settings) { string directory = Directory.GetParent(filePath).FullName; string filename = Path.GetFileNameWithoutExtension(filePath); //initial training set //List<PeptideTurnoverObject> peptidesToDetermineStartingParameters = new List<PeptideTurnoverObject>(); //peptides.GetRange(0, Math.Min(NUM_TRAINING_POINTS, peptides.Count)); List <string> linesOfDifferentStarts = new List <string>(); //starting values taken from figure 6b of Guan et al., 2011, Anal. Chem. "Compartment Modeling for Mammalian Protein Turnover Studies by Stable Isotope Metabolic Labeling" double ksto = 0.7; double kbto = 0.026; double kaoo = 2.0; //get approximate half-lives for each peptide UpdateKbi(ksto, kbto, kaoo, peptides); //split into 6 sections //kbi of >0.2, 0.2-0.1, 0.1-0.05, 0.05-0.025, 0.025-0.0125, <0.0125 //half lives of <3.5, 3.5-6.9, 6.9-13.9, 13.9-27.7, 27.7-55.5, >55.5 //for (int i = 0; i < NUM_TRAINING_GROUPS; i++) //{ // double currentMin = i == 0 ? 0 : 0.0125 * Math.Pow(2, i - 1); // double currentMax = i == 5 ? double.PositiveInfinity : 0.0125 * Math.Pow(2, i); // List<PeptideTurnoverObject> peptidesForThisSection = peptides.Where(x => x.Kbi <= currentMax && x.Kbi > currentMin).OrderByDescending(x => x.Timepoints.Length).ThenByDescending(x => x.TotalIntensity).ToList(); // int numPeptidesToAddFromThisSection = Math.Min(peptidesForThisSection.Count, NUM_TRAINING_POINTS_PER_GROUP); // for (int j = 0; j < numPeptidesToAddFromThisSection; j++) // { // peptidesToDetermineStartingParameters.Add(peptidesForThisSection[j]); // } //} //grab twice as many training points as desired List <PeptideTurnoverObject> innerQuartilePeptides = peptides.OrderBy(x => x.Kbi).ToList().GetRange(peptides.Count / 4, peptides.Count / 2).ToList(); //List<PeptideTurnoverObject> innerQuartilePeptides = peptides.OrderBy(x => x.Kbi).ToList().GetRange(peptides.Count / 100 * 5, peptides.Count / 100 * 90).ToList(); List <PeptideTurnoverObject> peptidesToDetermineStartingParameters = innerQuartilePeptides.OrderByDescending(x => x.Timepoints.Length).ThenByDescending(x => x.TotalIntensity).ToList(); peptidesToDetermineStartingParameters = peptidesToDetermineStartingParameters.GetRange(0, Math.Min(NUM_TRAINING_POINTS, peptidesToDetermineStartingParameters.Count)); //grab a subset List <double> kstList = new List <double>(); List <double> kbtList = new List <double>(); List <double> kaoList = new List <double>(); List <double> errors = new List <double>(); List <double> trainingHL = peptidesToDetermineStartingParameters.Select(x => x.Halflife).ToList(); List <double> allHL = innerQuartilePeptides.Select(x => x.Halflife).ToList(); //foreach training point foreach (PeptideTurnoverObject peptide in peptidesToDetermineStartingParameters) { PoolParameters parameters = new PoolParameters(ksto, kbto, kaoo); OptimizeFit(parameters, new List <PeptideTurnoverObject> { peptide }); kstList.Add(parameters.Kst); kbtList.Add(parameters.Kbt); kaoList.Add(parameters.Kao); errors.Add(peptide.Error); } //Get median of the values PoolParameters bestVariables = new PoolParameters(kstList.Median(), kbtList.Median(), kaoList.Median()); //train on test peptides OptimizeFit(bestVariables, peptidesToDetermineStartingParameters); //train on inner quartile peptides OptimizeFit(bestVariables, innerQuartilePeptides); double bestKst = bestVariables.Kst; double bestKbt = bestVariables.Kbt; double bestKao = bestVariables.Kao; OptimizeFit(bestVariables, innerQuartilePeptides); //For each peptide, apply the kst, kbt, and kao, but optimize for the kbi UpdateKbi(bestKst, bestKbt, bestKao, peptides, ITERATIVE_SHIFT); //fine tune UpdateKbi(bestKst, bestKbt, bestKao, peptides, ITERATIVE_SHIFT / 10); UpdateKbi(bestKst, bestKbt, bestKao, peptides, ITERATIVE_SHIFT / 100); //remove messy peptides if (settings.RemoveMessyPeptides) { for (int i = peptides.Count - 1; i >= 0; i--) { var peptide = peptides[i]; double[] predictions = PredictRelativeFractionUsingThreeCompartmentModel(bestKst, bestKbt, bestKao, peptide.Kbi, peptide.Timepoints); double[] actualValues = peptide.RelativeFractions; for (int j = 0; j < predictions.Length; j++) { if (Math.Abs(actualValues[j] - predictions[j]) > 0.1) { peptides.RemoveAt(i); break; } } } //update inner quartile peptides innerQuartilePeptides = peptides.OrderBy(x => x.Kbi).ToList().GetRange(peptides.Count / 4, peptides.Count / 2).ToList(); //innerQuartilePeptides = peptides.OrderBy(x => x.Kbi).ToList().GetRange(peptides.Count / 100 * 5, peptides.Count / 100 * 90).ToList(); } //train on inner quartile peptides again OptimizeFit(bestVariables, innerQuartilePeptides); //grid analysis //create array of different ksts, kbts, and kaos to see if we're in a local minimum while (true) { double bestError = double.PositiveInfinity; double[] ratiosForIteration = new double[] { 0.1, 0.2, 0.33, 0.5, 0.75, 0.9, 1, 1.1, 1.25, 1.5, 2, 4 }; for (int i = 0; i < ratiosForIteration.Length; i++) { double kstCurrent = bestVariables.Kst * ratiosForIteration[i]; if (kstCurrent < MAX_KST_VALUE && kstCurrent > MIN_PARAMETER_VALUE) { for (int j = 0; j < ratiosForIteration.Length; j++) { double kbtCurrent = bestVariables.Kbt * ratiosForIteration[j]; if (kbtCurrent < MAX_KBT_VALUE && kbtCurrent > MIN_PARAMETER_VALUE) { for (int k = 0; k < ratiosForIteration.Length; k++) { double kaoCurrent = bestVariables.Kao * ratiosForIteration[k]; if (kaoCurrent < MAX_KAO_VALUE && kaoCurrent > MIN_PARAMETER_VALUE) { UpdateKbi(kstCurrent, kbtCurrent, kaoCurrent, innerQuartilePeptides, ITERATIVE_SHIFT); UpdateKbi(kstCurrent, kbtCurrent, kaoCurrent, innerQuartilePeptides, ITERATIVE_SHIFT / 10); UpdateKbi(kstCurrent, kbtCurrent, kaoCurrent, innerQuartilePeptides, ITERATIVE_SHIFT / 100); double currentError = innerQuartilePeptides.Sum(x => x.Error); if (currentError < bestError) { bestError = currentError; bestKst = kstCurrent; bestKbt = kbtCurrent; bestKao = kaoCurrent; } } } } } } } if (bestVariables.Kst.Equals(bestKst) && bestVariables.Kbt.Equals(bestKbt) && bestVariables.Kao.Equals(bestKao)) { break; } else { bestVariables.Kst = bestKst; bestVariables.Kbt = bestKbt; bestVariables.Kao = bestKao; //train on all inner quartile peptides again OptimizeFit(bestVariables, innerQuartilePeptides); } } bestKst = bestVariables.Kst; bestKbt = bestVariables.Kbt; bestKao = bestVariables.Kao; //For each peptide, apply the kst, kbt, and kao, and optimize for the kbi UpdateKbi(bestKst, bestKbt, bestKao, peptides, ITERATIVE_SHIFT); //fine tune UpdateKbi(bestKst, bestKbt, bestKao, peptides, ITERATIVE_SHIFT / 10); UpdateKbi(bestKst, bestKbt, bestKao, peptides, ITERATIVE_SHIFT / 100); //use the monte carlo method to estimate the 95% confidence interval Parallel.ForEach(Partitioner.Create(0, peptides.Count), new ParallelOptions { MaxDegreeOfParallelism = 8 }, (partitionRange, loopState) => { for (int i = partitionRange.Item1; i < partitionRange.Item2; i++) { UpdateKbiConfidenceInterval(bestKst, bestKbt, bestKao, peptides[i], ITERATIVE_SHIFT); } }); List <string> linesToWrite = new List <string>(); linesToWrite.Add("Peptide\tProtein\tProteoform\tHalf-life\tLowerConfidenceInterval\tUpperConfidenceInterval\tError (MSE)\tSummed Intensity\tNumber of Ratios"); string previousFullSeq = ""; foreach (PeptideTurnoverObject peptide in peptides.OrderBy(x => x.FullSequence)) { if (!previousFullSeq.Equals(peptide.FullSequence)) { previousFullSeq = peptide.FullSequence; linesToWrite.Add(peptide.FullSequence + '\t' + peptide.Protein + '\t' + peptide.Proteoform + '\t' + (Math.Log(2, Math.E) / peptide.Kbi).ToString() + '\t' + (Math.Log(2, Math.E) / peptide.HighKbi).ToString() + '\t' + (Math.Log(2, Math.E) / peptide.LowKbi).ToString() + '\t' + peptide.Error.ToString() + '\t' + peptide.TotalIntensity.ToString() + '\t' + peptide.Timepoints.Length.ToString()); } } //output each peptide with its sequence, kbi, 95% confidence interval, and protein File.WriteAllLines(Path.Combine(directory, filename + "_Results", filename + "_PeptideTurnoverResults.tsv"), linesToWrite); return(new PoolParameters(bestKst, bestKbt, bestKao)); }
public static void OptimizeFit(PoolParameters bestVariables, List <PeptideTurnoverObject> peptides) { bool optimizing = true; double updatedError = 0; bool increaseParameter = true; double kst = bestVariables.Kst; double kbt = bestVariables.Kbt; double kao = bestVariables.Kao; UpdateKbi(kst, kbt, kao, peptides, ITERATIVE_SHIFT); double previousError = peptides.Sum(x => x.Error); double bestError = previousError; while (optimizing) { optimizing = false; kst = bestVariables.Kst; kbt = bestVariables.Kbt; kao = bestVariables.Kao; PoolParameters variables = new PoolParameters(kst, kbt, kao); //KST if (kst + ITERATIVE_SHIFT < MAX_KST_VALUE) // Math.Min(MAX_KST_VALUE, kao)) { UpdateKbi(kst + ITERATIVE_SHIFT, kbt, kao, peptides, ITERATIVE_SHIFT); updatedError = peptides.Sum(x => x.Error); increaseParameter = true; } else { updatedError = double.PositiveInfinity; } //if not a good move, try going the other way if (!(updatedError < previousError) && kst - ITERATIVE_SHIFT > MIN_PARAMETER_VALUE)//Math.Max(MIN_PARAMETER_VALUE, kbt)) { UpdateKbi(kst - ITERATIVE_SHIFT, kbt, kao, peptides, ITERATIVE_SHIFT); updatedError = peptides.Sum(x => x.Error); increaseParameter = false; } if (previousError > updatedError) { optimizing = true; double diff = (previousError - updatedError) / ITERATIVE_SHIFT; if (diff > ITERATIVE_SHIFT * 3) { diff = Math.Round(diff / ITERATIVE_SHIFT) * ITERATIVE_SHIFT; double tempError = double.PositiveInfinity; if (increaseParameter) { if (kst + diff < kao)//Math.Min(MAX_KST_VALUE, kao)) //kst should be less than kao { UpdateKbi(kst + diff, kbt, kao, peptides, ITERATIVE_SHIFT); tempError = peptides.Sum(x => x.Error); } } else { if (kst - diff > MIN_PARAMETER_VALUE)//Math.Max(MIN_PARAMETER_VALUE, kbt)) { UpdateKbi(kst - diff, kbt, kao, peptides, ITERATIVE_SHIFT); tempError = peptides.Sum(x => x.Error); } } if (tempError > updatedError) { diff = ITERATIVE_SHIFT; } else { updatedError = tempError; } } else { diff = ITERATIVE_SHIFT; } if (increaseParameter) { variables.Kst += diff; } else { variables.Kst -= diff; } if (updatedError < bestError) { bestError = updatedError; bestVariables.Kst = variables.Kst; bestVariables.Kbt = kbt; bestVariables.Kao = kao; } } //KBT if (kbt + ITERATIVE_SHIFT < MAX_KBT_VALUE)//Math.Min(MAX_KBT_VALUE, kst)) { UpdateKbi(kst, kbt + ITERATIVE_SHIFT, kao, peptides, ITERATIVE_SHIFT); updatedError = peptides.Sum(x => x.Error); increaseParameter = true; } else { updatedError = double.PositiveInfinity; } if (!(updatedError < previousError) && kbt - ITERATIVE_SHIFT > MIN_PARAMETER_VALUE) { UpdateKbi(kst, kbt - ITERATIVE_SHIFT, kao, peptides, ITERATIVE_SHIFT); updatedError = peptides.Sum(x => x.Error); increaseParameter = false; } if (previousError > updatedError) { optimizing = true; double diff = (previousError - updatedError) / ITERATIVE_SHIFT; if (diff > ITERATIVE_SHIFT * 3) { diff = Math.Round(diff / ITERATIVE_SHIFT) * ITERATIVE_SHIFT; double tempError = double.PositiveInfinity; if (increaseParameter) { if (kbt + diff < Math.Min(MAX_KBT_VALUE, kst)) //kbt should be less than kst { UpdateKbi(kst, kbt + diff, kao, peptides, ITERATIVE_SHIFT); tempError = peptides.Sum(x => x.Error); } } else { if (kbt - diff > MIN_PARAMETER_VALUE) { UpdateKbi(kst, kbt - diff, kao, peptides, ITERATIVE_SHIFT); tempError = peptides.Sum(x => x.Error); } } if (tempError > updatedError) { diff = ITERATIVE_SHIFT; } else { updatedError = tempError; } } else { diff = ITERATIVE_SHIFT; } if (increaseParameter) { variables.Kbt += diff; } else { variables.Kbt -= diff; } if (updatedError < bestError) { bestError = updatedError; bestVariables.Kst = kst; bestVariables.Kbt = variables.Kbt; bestVariables.Kao = kao; } } //KAO if (kao + ITERATIVE_SHIFT < MAX_KAO_VALUE) { UpdateKbi(kst, kbt, kao + ITERATIVE_SHIFT, peptides, ITERATIVE_SHIFT); updatedError = peptides.Sum(x => x.Error); increaseParameter = true; } else { updatedError = double.PositiveInfinity; } if (!(updatedError < previousError) && kao - ITERATIVE_SHIFT > MIN_PARAMETER_VALUE)//Math.Max(MIN_PARAMETER_VALUE, kst)) //kao should be greater than kst { UpdateKbi(kst, kbt, kao - ITERATIVE_SHIFT, peptides, ITERATIVE_SHIFT); updatedError = peptides.Sum(x => x.Error); increaseParameter = false; } if (previousError > updatedError) { optimizing = true; double diff = (previousError - updatedError) / ITERATIVE_SHIFT; if (diff > ITERATIVE_SHIFT * 3) { diff = Math.Round(diff / ITERATIVE_SHIFT) * ITERATIVE_SHIFT; double tempError = double.PositiveInfinity; if (increaseParameter) { if (kao + diff < MAX_KAO_VALUE) { UpdateKbi(kst, kbt, kao + diff, peptides, ITERATIVE_SHIFT); tempError = peptides.Sum(x => x.Error); } } else { if (kao - diff > MIN_PARAMETER_VALUE)//Math.Max(MIN_PARAMETER_VALUE, kst)) //kao should be greater than kst { UpdateKbi(kst, kbt, kao - diff, peptides, ITERATIVE_SHIFT); tempError = peptides.Sum(x => x.Error); } } if (tempError > updatedError) { diff = ITERATIVE_SHIFT; } else { updatedError = tempError; } } else { diff = ITERATIVE_SHIFT; } if (increaseParameter) { variables.Kao += diff; } else { variables.Kao -= diff; } if (updatedError < bestError) { bestError = updatedError; bestVariables.Kst = kst; bestVariables.Kbt = kbt; bestVariables.Kao = variables.Kao; } } //test the new gradient to ensure we don't get trapped UpdateKbi(variables.Kst, variables.Kbt, variables.Kao, peptides, ITERATIVE_SHIFT); updatedError = peptides.Sum(x => x.Error); if (updatedError < bestError) { bestError = updatedError; bestVariables.Kst = variables.Kst; bestVariables.Kbt = variables.Kbt; bestVariables.Kao = variables.Kao; } else //reset to the old { UpdateKbi(bestVariables.Kst, bestVariables.Kbt, bestVariables.Kao, peptides, ITERATIVE_SHIFT); } previousError = bestError; } }
public static void WriteQuickLoadFile(string fileToWrite, PoolParameters poolParams, List <PeptideTurnoverObject> peptides, List <PeptideTurnoverObject> proteins, List <PeptideTurnoverObject> proteoforms) { List <string> linesToWrite = new List <string>(); //We need to write the: //-pool parameters linesToWrite.Add(poolParams.Kst.ToString() + '\t' + poolParams.Kbt.ToString() + '\t' + poolParams.Kao.ToString()); //-Peptides foreach (PeptideTurnoverObject peptide in peptides) { linesToWrite.Add( peptide.FullSequence + '\t' + string.Join(';', peptide.Timepoints) + '\t' + string.Join(';', peptide.RelativeFractions) + '\t' + string.Join(';', peptide.Filenames) + '\t' + string.Join(';', peptide.Intensities) + '\t' + peptide.TotalIntensity.ToString() + '\t' + peptide.FileName + '\t' + peptide.Protein + '\t' + peptide.Proteoform + '\t' + peptide.Kbi.ToString() + '\t' + peptide.Error.ToString() + '\t' + peptide.LowKbi.ToString() + '\t' + peptide.HighKbi.ToString()); } linesToWrite.Add("-"); //-Proteins foreach (PeptideTurnoverObject protein in proteins) { linesToWrite.Add( protein.FullSequence + '\t' + string.Join(';', protein.Timepoints) + '\t' + string.Join(';', protein.RelativeFractions) + '\t' + string.Join(';', protein.Filenames) + '\t' + string.Join(';', protein.Intensities) + '\t' + protein.TotalIntensity.ToString() + '\t' + protein.FileName + '\t' + protein.Protein + '\t' + protein.Kbi.ToString() + '\t' + protein.LowKbi.ToString() + '\t' + protein.HighKbi.ToString()); } linesToWrite.Add("-"); //-Proteoforms foreach (PeptideTurnoverObject proteoform in proteoforms) { linesToWrite.Add( proteoform.FullSequence + '\t' + string.Join(';', proteoform.Timepoints) + '\t' + string.Join(';', proteoform.RelativeFractions) + '\t' + string.Join(';', proteoform.Filenames) + '\t' + string.Join(';', proteoform.Intensities) + '\t' + proteoform.TotalIntensity.ToString() + '\t' + proteoform.FileName + '\t' + proteoform.Protein + '\t' + proteoform.Kbi.ToString() + '\t' + proteoform.LowKbi.ToString() + '\t' + proteoform.HighKbi.ToString()); } File.WriteAllLines(fileToWrite, linesToWrite); }
public static bool LoadExistingResults(string inputFile, string fileToLoad, Dictionary <string, PoolParameters> poolParameterDictionary, ObservableCollection <PeptideTurnoverObject> peptides, List <PeptideTurnoverObject> proteins, List <PeptideTurnoverObject> proteoforms) { try { string[] lines = File.ReadAllLines(fileToLoad); //We need to read in the: //-pool parameters double[] poolParams = lines[0].Split('\t').Select(x => Convert.ToDouble(x)).ToArray(); poolParameterDictionary[inputFile] = new PoolParameters(poolParams[0], poolParams[1], poolParams[2]); //-peptides int i = 1; for (; i < lines.Length; i++) { string[] line = lines[i].Split('\t').ToArray(); if (line.Length == 1) { i++; break; } PeptideTurnoverObject peptide = new PeptideTurnoverObject( line[0], line[1].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[2].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[3].Split(';'), line[4].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), Convert.ToDouble(line[5]), inputFile, //file line[7], line[8]); peptide.Kbi = Convert.ToDouble(line[9]); peptide.Error = Convert.ToDouble(line[10]); peptide.LowKbi = Convert.ToDouble(line[11]); peptide.HighKbi = Convert.ToDouble(line[12]); peptides.Add(peptide); } //-Proteins for (; i < lines.Length; i++) { string[] line = lines[i].Split('\t').ToArray(); if (line.Length == 1) { i++; break; } PeptideTurnoverObject protein = new PeptideTurnoverObject( line[0], line[1].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[2].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[3].Split(';'), line[4].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), Convert.ToDouble(line[5]), inputFile, //file line[7]); protein.Kbi = Convert.ToDouble(line[8]); protein.LowKbi = Convert.ToDouble(line[9]); protein.HighKbi = Convert.ToDouble(line[10]); proteins.Add(protein); } //-Proteoforms for (; i < lines.Length; i++) { string[] line = lines[i].Split('\t').ToArray(); PeptideTurnoverObject proteoform = new PeptideTurnoverObject( line[0], line[1].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[2].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[3].Split(';'), line[4].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), Convert.ToDouble(line[5]), inputFile, //file line[7]); proteoform.Kbi = Convert.ToDouble(line[8]); proteoform.LowKbi = Convert.ToDouble(line[9]); proteoform.HighKbi = Convert.ToDouble(line[10]); proteoforms.Add(proteoform); } return(true); } catch { return(false); } }
private void PlotPrecisionScatterPlot(List <PeptideTurnoverObject> peptidesToPlot, PoolParameters poolParams) { PrecisionPlot.plt.Clear(); PrecisionPlot.plt.GetPlottables().Clear(); if (peptidesToPlot.Count == 0) { return; } Dictionary <double, List <(double halfLife, double relativeFraction)> > dictionaryToPlot = new Dictionary <double, List <(double halfLife, double relativeFraction)> >(); foreach (PeptideTurnoverObject peptide in peptidesToPlot) { //grab measurements double halfLife = Math.Log(2, Math.E) / peptide.Kbi; for (int i = 0; i < peptide.Timepoints.Length; i++) { if (dictionaryToPlot.ContainsKey(peptide.Timepoints[i])) { dictionaryToPlot[peptide.Timepoints[i]].Add((halfLife, peptide.RelativeFractions[i])); } else { dictionaryToPlot[peptide.Timepoints[i]] = new List <(double halfLife, double relativeFraction)> { (halfLife, peptide.RelativeFractions[i]) }; } } } //plot all peptide data double[] timepoints = dictionaryToPlot.Keys.OrderBy(x => x).ToArray(); foreach (double timepoint in timepoints) { var value = dictionaryToPlot[timepoint]; PrecisionPlot.plt.PlotScatter(value.Select(x => x.halfLife).ToArray(), value.Select(x => x.relativeFraction).ToArray(), lineWidth: 0, markerSize: 3, label: timepoint.ToString(), markerShape: ScottPlot.MarkerShape.openCircle); } //plt fits for each timepoint on top of the peptide data double[] halflives = new double[2499]; for (int i = 0; i < halflives.Length; i++) { halflives[i] = i / 5.0 + 0.2; } List <double>[] rfs = new List <double> [timepoints.Length]; for (int i = 0; i < timepoints.Length; i++) { rfs[i] = new List <double>(); } foreach (double halflife in halflives) { if (halflife == 0) { continue; } //halflife = ln(2)/kbi //kbi = ln(2)/halflife double[] rfsForThisHalfLife = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel( poolParams.Kst, poolParams.Kbt, poolParams.Kao, Math.Log(2, Math.E) / halflife, timepoints); for (int i = 0; i < timepoints.Length; i++) { rfs[i].Add(rfsForThisHalfLife[i]); } } PrecisionPlot.plt.Axis(0, 50, 0, 1); for (int i = 0; i < timepoints.Length; i++) { PrecisionPlot.plt.PlotScatter(halflives, rfs[i].ToArray(), Color.Black, markerSize: 0); } if (DisplayLegendCheckBox.IsChecked.Value) { PrecisionPlot.plt.Legend(location: ScottPlot.legendLocation.upperRight); } else { PrecisionPlot.plt.Legend(false); } PrecisionPlot.plt.YLabel("Lys0 / LysTotal"); PrecisionPlot.plt.XLabel("Half-life (Days)"); PrecisionPlot.plt.Axis(0, 50, 0, 1); PrecisionPlot.Render(); PrecisionPlot.Render(); }
private void Worker_DoWork(object sender, DoWorkEventArgs e) { Dispatcher.Invoke(() => { FilesToDisplayObservableCollection.Clear(); FilesToHideObservableCollection.Clear(); }); //check if we can load old results List <string> quantifiedPeptideInputFiles = DataFilesObservableCollection.Select(x => x.FilePath).ToList(); List <string> turnoverResultFiles = new List <string>(); foreach (string file in DataFilesObservableCollection.Select(x => x.FilePath)) { string inputFile = file; string directory = Directory.GetParent(inputFile).FullName; string filename = Path.GetFileNameWithoutExtension(inputFile); //can be either the fast load file or the original input string fastLoadFile = Path.Combine(directory, filename + "_ApplETurnoverSavedSession.tsv"); if (File.Exists(fastLoadFile)) { Dispatcher.Invoke(() => { FilesToDisplayObservableCollection.Add(inputFile); }); quantifiedPeptideInputFiles.Remove(inputFile); //remove so we don't analyze it again quantifiedPeptideInputFiles.Remove(fastLoadFile); //remove so we don't analyze it again if (!DataPreparation.LoadExistingResults(inputFile, fastLoadFile, PoolParameterDictionary, AllPeptides, AnalyzedProteins, AnalyzedProteoforms)) { //something went wrong. Bail, reset, and run normally quantifiedPeptideInputFiles = ResetFastLoadAttempt(); break; } } else { //something went wrong. Bail, reset, and run normally quantifiedPeptideInputFiles = ResetFastLoadAttempt(); break; } } if (quantifiedPeptideInputFiles.Count != 0 && DatabasesObservableCollection.Count != 0) { AllowFileDrop = false; Dispatcher.Invoke(() => { ToggleButtons(); FilesToDisplayObservableCollection.Clear(); FilesToHideObservableCollection.Clear(); }); try { (sender as BackgroundWorker).ReportProgress(0, "Starting"); Settings settings = GetUserSpecifiedSettings(); string maxStatus = quantifiedPeptideInputFiles.Count.ToString(); int status = 1; //reading database (sender as BackgroundWorker).ReportProgress(0, "Reading Database"); List <Protein> theoreticalProteins = DataPreparation.LoadProteins(DatabasesObservableCollection.Select(x => x.FilePath).ToList()).OrderBy(x => x.Accession).ToList(); Dispatcher.Invoke(() => { PeptidesToDisplay.Clear(); AllPeptides.Clear(); }); foreach (string originalFile in quantifiedPeptideInputFiles) { string file = originalFile.Replace("_ApplETurnoverSavedSession", ""); //remove the extension if there was a failed load in a multi-file analysis string path = Path.Combine(Directory.GetParent(file).FullName, Path.GetFileNameWithoutExtension(file)); ////check if the file has already been analyzed //if (!File.Exists(path + "_TurnoverResults.txt")) //{ //Load data, filter, process, parsimony (sender as BackgroundWorker).ReportProgress(0, "Reading File " + status.ToString() + "/" + maxStatus + "..."); List <PeptideTurnoverObject> peptides = DataPreparation.ReadData(file, settings, theoreticalProteins); if (peptides.Count == 0) { throw new Exception("No peptides were found for file: " + file + "; did you select the correct search engine?"); } //Fit data to model, get half lives, confidence intervals (sender as BackgroundWorker).ReportProgress(0, "Analyzing File " + status.ToString() + "/" + maxStatus + "..."); //debug peptides = peptides.OrderBy(x => x.FullSequence).ToList(); for (int i = 1; i < peptides.Count; i++) { if (peptides[i].FullSequence.Equals(peptides[i - 1].FullSequence)) { } } PoolParameters poolParams = NonLinearRegression.RegressionAnalysis(peptides, file, settings); //get protein info var proteinGroups = peptides.GroupBy(x => x.Protein).ToList(); List <PeptideTurnoverObject> proteins = NonLinearRegression.GetProteinInfo(peptides, file, proteinGroups, "Protein"); //get proteoform info var proteoformGroups = peptides.GroupBy(x => x.Proteoform).ToList(); List <PeptideTurnoverObject> proteoforms = NonLinearRegression.GetProteinInfo(peptides, file, proteoformGroups, "Proteoform"); AnalyzedProteins.AddRange(proteins); AnalyzedProteoforms.AddRange(proteoforms); PoolParameterDictionary.Add(file, poolParams); PlotFit(poolParams, Path.GetFileNameWithoutExtension(file) + " Free Amino Acids"); //save results to allow for quick loading in the future string directory = Directory.GetParent(file).FullName; string filename = Path.GetFileNameWithoutExtension(file); string resultFile = Path.Combine(directory, filename + "_ApplETurnoverSavedSession.tsv"); DataPreparation.WriteQuickLoadFile(resultFile, poolParams, peptides, proteins, proteoforms); //Add the peptides/proteins to the collection for viewing in the GUI Dispatcher.Invoke(() => { foreach (PeptideTurnoverObject peptide in peptides) { AllPeptides.Add(peptide); PeptidesToDisplay.Add(peptide); } DisplayPeptidesDataGrid.Items.Refresh(); FilesToDisplayObservableCollection.Add(file); }); status++; } (sender as BackgroundWorker).ReportProgress(0, "Running Statistics"); TTest.CompareProteinsAcrossFiles(quantifiedPeptideInputFiles, AnalyzedProteins, PoolParameterDictionary); TTest.CompareProteoformsWithinFiles(quantifiedPeptideInputFiles, AnalyzedProteoforms, PoolParameterDictionary); (sender as BackgroundWorker).ReportProgress(0, "Finished!"); } catch (Exception ex) { MessageBox.Show("Task failed: " + ex.Message); (sender as BackgroundWorker).ReportProgress(0, "Task failed"); } AllowFileDrop = true; Dispatcher.Invoke(() => { ToggleButtons(); }); } else if (DataFilesObservableCollection.Count() != 0 && AllPeptides.Count != 0) //we had files to analyze but we were able to fast load them { Dispatcher.Invoke(() => { foreach (PeptideTurnoverObject peptide in AllPeptides) { PeptidesToDisplay.Add(peptide); } }); (sender as BackgroundWorker).ReportProgress(0, "Finished!"); } else { MessageBox.Show("Input files are missing. The run has been stopped."); (sender as BackgroundWorker).ReportProgress(0, "Task failed: Input missing files."); } }
public static void CompareProteinsAcrossFiles(List <string> filenames, List <PeptideTurnoverObject> allProteins, Dictionary <string, PoolParameters> poolParameterDictionary) { if (filenames.Count < 2) { return; } string outputDirectory = Path.GetDirectoryName(filenames.First()); Directory.CreateDirectory(Path.Combine(outputDirectory, "StatisticalComparisons")); for (int i = 0; i < filenames.Count; i++) { string fileOne = filenames[i]; PoolParameters paramsOne = poolParameterDictionary[fileOne]; //get the proteins for this file List <PeptideTurnoverObject> proteinsForFileOne = allProteins.Where(x => fileOne.Equals(x.FileName)).OrderBy(x => x.Protein).ToList(); for (int j = i + 1; j < filenames.Count; j++) { string fileTwo = filenames[j]; PoolParameters paramsTwo = poolParameterDictionary[fileTwo]; List <string> linesToWrite = new List <string>(); //add header linesToWrite.Add("Protein\tFold Change\tNeg. log(p-Value)\tHalf-life " + fileOne + "\tHalf-life " + fileTwo); List <PeptideTurnoverObject> proteinsForFileTwo = allProteins.Where(x => fileTwo.Equals(x.FileName)).OrderBy(x => x.Protein).ToList(); //get the overlap between them int a = 0; int b = 0; while (a < proteinsForFileOne.Count && b < proteinsForFileTwo.Count) { PeptideTurnoverObject proteinOne = proteinsForFileOne[a]; PeptideTurnoverObject proteinTwo = proteinsForFileTwo[b]; int comparison = (proteinOne.Protein).CompareTo(proteinTwo.Protein); if (comparison == 0) { //do the comparison (t-test of montecarlos, which dramatically overestimates the sample size) //Sample sampleOne = new Sample(proteinOne.MonteCarloKbis.Select(x => Math.Log10(2) / x)); //Sample sampleTwo = new Sample(proteinTwo.MonteCarloKbis.Select(x => Math.Log10(2) / x)); //TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); //linesToWrite.Add(proteinOne.Protein + "\t" + (Math.Log2(sampleTwo.Median) - Math.Log2(sampleOne.Median)).ToString() + '\t' + // (-1*Math.Log10(result.Probability)).ToString() + '\t' + (Math.Log10(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log10(2) / proteinTwo.Kbi).ToString()); //do the comparison (t-test of normalized ratios for all timepoints) double averageKbi = (proteinOne.Kbi + proteinTwo.Kbi) / 2; double normalizedHalfLife = Math.Log(2) / (averageKbi); //this is the day we're going to normalize all of the relative fractions to //create an array of a single value (the normalized timepoint) to create a new timepoint array double[] comparisonTimepointsOne = new double[proteinOne.Timepoints.Length]; double[] comparisonTimepointsTwo = new double[proteinTwo.Timepoints.Length]; for (int index = 0; index < comparisonTimepointsOne.Length; index++) { comparisonTimepointsOne[index] = normalizedHalfLife; } for (int index = 0; index < comparisonTimepointsTwo.Length; index++) { comparisonTimepointsTwo[index] = normalizedHalfLife; } //predict the expected values for the ratios of protein one based on the fit of the comparison double[] expectedOriginalRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsOne.Kst, paramsOne.Kbt, paramsOne.Kao, averageKbi, proteinOne.Timepoints); //predict the expected values for the ratios of proteoform one based on the fit of the comparison if they were all at the same normalized timepoint double[] expectedUpdatedRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsOne.Kst, paramsOne.Kbt, paramsOne.Kao, averageKbi, comparisonTimepointsOne); //do the same thing with protein two double[] expectedOriginalRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsTwo.Kst, paramsTwo.Kbt, paramsTwo.Kao, averageKbi, proteinTwo.Timepoints); double[] expectedUpdatedRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsTwo.Kst, paramsTwo.Kbt, paramsTwo.Kao, averageKbi, comparisonTimepointsTwo); //create empty arrays for the normalized ratios double[] normalizedRatiosOne = new double[expectedOriginalRatiosOne.Length]; double[] normalizedRatiosTwo = new double[expectedOriginalRatiosTwo.Length]; //calculate the normalized ratios by subtracting the expected ratio (so that we are measuring the residual between the point and the comparison fit) and then adding the ratio of the comparison fit at the normalized timepoint. for (int index = 0; index < proteinOne.RelativeFractions.Length; index++) { //the normalized ratio is equal to the original ratio minus the original fit to the data plus the fit if the kbi was averaged normalizedRatiosOne[index] = proteinOne.RelativeFractions[index] - expectedOriginalRatiosOne[index] + expectedUpdatedRatiosOne[index]; } for (int index = 0; index < proteinTwo.RelativeFractions.Length; index++) { normalizedRatiosTwo[index] = proteinTwo.RelativeFractions[index] - expectedOriginalRatiosTwo[index] + expectedUpdatedRatiosTwo[index]; } Sample sampleOne = new Sample(normalizedRatiosOne); Sample sampleTwo = new Sample(normalizedRatiosTwo); TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); linesToWrite.Add(proteinOne.Protein + "\t" + (Math.Log2(Math.Log(2) / proteinTwo.Kbi) - Math.Log2(Math.Log(2) / proteinOne.Kbi)).ToString() + '\t' + (-1 * Math.Log10(result.Probability)).ToString() + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString()); a++; b++; } else if (comparison < 0) { a++; } else { b++; } } File.WriteAllLines(Path.Combine(outputDirectory, "StatisticalComparisons", "Comparison_" + Path.GetFileNameWithoutExtension(fileOne) + "vs" + Path.GetFileNameWithoutExtension(fileTwo) + ".tsv"), linesToWrite); } } }
public static void CompareProteoformsWithinFiles(List <string> filenames, List <PeptideTurnoverObject> allProteins, Dictionary <string, PoolParameters> poolParameterDictionary) { for (int fileIndex = 0; fileIndex < filenames.Count; fileIndex++) { string filename = filenames[fileIndex]; PoolParameters poolParams = poolParameterDictionary[filename]; List <PeptideTurnoverObject> proteinsForThisFile = allProteins.Where(x => filename.Equals(x.FileName)).OrderBy(x => x.Proteoform).ToList(); List <string> linesToWrite = new List <string>(); linesToWrite.Add("Proteoform A\tProteoform B\tHalf-life A\tHalf-life B\tLog2(Fold Change)\tNeg. log(p-Value)"); int indexOfNextProteoformFamily = 0; for (int i = 0; i < proteinsForThisFile.Count; i++) { string currentProtein = proteinsForThisFile[i].Proteoform.Split('_')[0]; //find last index for this proteoform family indexOfNextProteoformFamily++; for (; indexOfNextProteoformFamily < proteinsForThisFile.Count; indexOfNextProteoformFamily++) { if (!currentProtein.Equals(proteinsForThisFile[indexOfNextProteoformFamily].Proteoform.Split('_')[0])) { break; } } for (; i < indexOfNextProteoformFamily; i++) { PeptideTurnoverObject proteinOne = proteinsForThisFile[i]; //see if it has a localized mod (or localized unmodified site), otherwise skip string[] proteoformOne = proteinOne.Proteoform.Split('@').ToArray(); if (proteoformOne.Length == 2) { for (int j = i + 1; j < indexOfNextProteoformFamily; j++) { PeptideTurnoverObject proteinTwo = proteinsForThisFile[j]; string[] proteoformTwo = proteinTwo.Proteoform.Split('@').ToArray(); //if these are a pair for the same modification site, then do the comparison if (proteoformTwo.Length == 2 && proteoformOne[1].Equals(proteoformTwo[1])) { //do the comparison (t-test of normalized ratios for all timepoints) double averageKbi = (proteinOne.Kbi + proteinTwo.Kbi) / 2; double normalizedHalfLife = Math.Log(2) / (averageKbi); //this is the day we're going to normalize all of the relative fractions to //create an array of a single value (the normalized timepoint) to create a new timepoint array double[] comparisonTimepointsOne = new double[proteinOne.Timepoints.Length]; double[] comparisonTimepointsTwo = new double[proteinTwo.Timepoints.Length]; for (int index = 0; index < comparisonTimepointsOne.Length; index++) { comparisonTimepointsOne[index] = normalizedHalfLife; } for (int index = 0; index < comparisonTimepointsTwo.Length; index++) { comparisonTimepointsTwo[index] = normalizedHalfLife; } double[] expectedOriginalRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, proteinOne.Timepoints); double[] expectedUpdatedRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, comparisonTimepointsOne); double[] expectedOriginalRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, proteinTwo.Timepoints); double[] expectedUpdatedRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, comparisonTimepointsTwo); double[] normalizedRatiosOne = new double[expectedOriginalRatiosOne.Length]; double[] normalizedRatiosTwo = new double[expectedOriginalRatiosTwo.Length]; for (int index = 0; index < proteinOne.RelativeFractions.Length; index++) { //the normalized ratio is equal to the original ratio minus the original fit to the data plus the fit if the kbi was averaged normalizedRatiosOne[index] = proteinOne.RelativeFractions[index] - expectedOriginalRatiosOne[index] + expectedUpdatedRatiosOne[index]; } for (int index = 0; index < proteinTwo.RelativeFractions.Length; index++) { normalizedRatiosTwo[index] = proteinTwo.RelativeFractions[index] - expectedOriginalRatiosTwo[index] + expectedUpdatedRatiosTwo[index]; } Sample sampleOne = new Sample(normalizedRatiosOne); Sample sampleTwo = new Sample(normalizedRatiosTwo); TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); try //sometimes crashes if stdev is zero { linesToWrite.Add(proteinOne.Proteoform + "\t" + proteinTwo.Proteoform + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString() + '\t' + (Math.Log2((Math.Log(2) / proteinTwo.Kbi)) - Math.Log2((Math.Log(2) / proteinOne.Kbi))).ToString() + '\t' + (-1 * Math.Log(result.Probability)).ToString()); } catch { linesToWrite.Add(proteinOne.Proteoform + "\t" + proteinTwo.Proteoform + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString() + '\t' + (Math.Log2(sampleTwo.Median) - Math.Log2(sampleOne.Median)).ToString() + '\t' + "NA"); } } } } } i--; } File.WriteAllLines(Path.Combine(Path.GetDirectoryName(filename), Path.GetFileNameWithoutExtension(filename) + "_Results", Path.GetFileNameWithoutExtension(filename) + "_ProteoformAnalysis.tsv"), linesToWrite); } }