private void PlotFit(PoolParameters poolParams, string label, double kbi = 1000000) //default kbi is just a very high number to simulate instantaneous turnover (proxy for the available amino acid pool) { double[] timepoints = new double[1000]; for (int i = 0; i < timepoints.Length; i++) { timepoints[i] = i / 10.0; } //half-life = ln(2)/kbt, make half life 0, kbt = infinity double[] rfs = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, kbi, timepoints); Dispatcher.Invoke(() => { RatioComparisonPlot.plt.Layout(titleHeight: 20, xLabelHeight: 40, y2LabelWidth: 20); RatioComparisonPlot.plt.XLabel("Time (Days)", fontSize: 20); // RatioComparisonPlot.plt.YLabel("Relative Fraction (Lys0/Total)", fontSize: 20); RatioComparisonPlot.plt.YLabel("Lys0 / LysTotal", fontSize: 20); RatioComparisonPlot.plt.Axis(0, 100, 0, 1); RatioComparisonPlot.plt.Ticks(fontSize: 18); RatioComparisonPlot.plt.PlotScatter(timepoints, rfs, label: label, markerShape: ScottPlot.MarkerShape.none); if (DisplayLegendCheckBox.IsChecked.Value) { RatioComparisonPlot.plt.Legend(); } RatioComparisonPlot.Render(); }); }
private void ParamApply_Click(object sender, RoutedEventArgs e) { if (DisplayAnalyzedFilesDataGrid.SelectedItem == null) { return; } assignParamsButton.IsEnabled = false; string dataFile = ((RawDataForDataGrid)DisplayAnalyzedFilesDataGrid.SelectedItem).FilePath; PoolParameters customParams = new PoolParameters(Convert.ToDouble(KstTB.Text), Convert.ToDouble(KbtTB.Text), Convert.ToDouble(KaoTB.Text)); PoolParameterDictionary[dataFile] = customParams; List <PeptideTurnoverObject> peptidesForThisFile = AllPeptides.Where(x => x.FileName.Equals(dataFile)).ToList(); NonLinearRegression.UpdateKbi(customParams.Kst, customParams.Kbt, customParams.Kao, peptidesForThisFile, 0.001); NonLinearRegression.UpdateKbi(customParams.Kst, customParams.Kbt, customParams.Kao, peptidesForThisFile, 0.0001); NonLinearRegression.UpdateKbi(customParams.Kst, customParams.Kbt, customParams.Kao, peptidesForThisFile, 0.00001); MseTB.Text = peptidesForThisFile.Sum(x => x.Error).ToString(); UpdateGlobalVisualization(); assignParamsButton.IsEnabled = true; //CreateMapForLocalMinimaSearch(); }
private void PlotPrecisionScatterPlot(List <PeptideTurnoverObject> peptidesToPlot, PoolParameters poolParams) { PrecisionPlot.plt.Clear(); PrecisionPlot.plt.GetPlottables().Clear(); if (peptidesToPlot.Count == 0) { return; } Dictionary <double, List <(double halfLife, double relativeFraction)> > dictionaryToPlot = new Dictionary <double, List <(double halfLife, double relativeFraction)> >(); foreach (PeptideTurnoverObject peptide in peptidesToPlot) { //grab measurements double halfLife = Math.Log(2, Math.E) / peptide.Kbi; for (int i = 0; i < peptide.Timepoints.Length; i++) { if (dictionaryToPlot.ContainsKey(peptide.Timepoints[i])) { dictionaryToPlot[peptide.Timepoints[i]].Add((halfLife, peptide.RelativeFractions[i])); } else { dictionaryToPlot[peptide.Timepoints[i]] = new List <(double halfLife, double relativeFraction)> { (halfLife, peptide.RelativeFractions[i]) }; } } } //plot all peptide data double[] timepoints = dictionaryToPlot.Keys.OrderBy(x => x).ToArray(); foreach (double timepoint in timepoints) { var value = dictionaryToPlot[timepoint]; PrecisionPlot.plt.PlotScatter(value.Select(x => x.halfLife).ToArray(), value.Select(x => x.relativeFraction).ToArray(), lineWidth: 0, markerSize: 3, label: timepoint.ToString(), markerShape: ScottPlot.MarkerShape.openCircle); } //plt fits for each timepoint on top of the peptide data double[] halflives = new double[2499]; for (int i = 0; i < halflives.Length; i++) { halflives[i] = i / 5.0 + 0.2; } List <double>[] rfs = new List <double> [timepoints.Length]; for (int i = 0; i < timepoints.Length; i++) { rfs[i] = new List <double>(); } foreach (double halflife in halflives) { if (halflife == 0) { continue; } //halflife = ln(2)/kbi //kbi = ln(2)/halflife double[] rfsForThisHalfLife = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel( poolParams.Kst, poolParams.Kbt, poolParams.Kao, Math.Log(2, Math.E) / halflife, timepoints); for (int i = 0; i < timepoints.Length; i++) { rfs[i].Add(rfsForThisHalfLife[i]); } } PrecisionPlot.plt.Axis(0, 50, 0, 1); for (int i = 0; i < timepoints.Length; i++) { PrecisionPlot.plt.PlotScatter(halflives, rfs[i].ToArray(), Color.Black, markerSize: 0); } if (DisplayLegendCheckBox.IsChecked.Value) { PrecisionPlot.plt.Legend(location: ScottPlot.legendLocation.upperRight); } else { PrecisionPlot.plt.Legend(false); } PrecisionPlot.plt.YLabel("Lys0 / LysTotal"); PrecisionPlot.plt.XLabel("Half-life (Days)"); PrecisionPlot.plt.Axis(0, 50, 0, 1); PrecisionPlot.Render(); PrecisionPlot.Render(); }
private void Worker_DoWork(object sender, DoWorkEventArgs e) { Dispatcher.Invoke(() => { FilesToDisplayObservableCollection.Clear(); FilesToHideObservableCollection.Clear(); }); //check if we can load old results List <string> quantifiedPeptideInputFiles = DataFilesObservableCollection.Select(x => x.FilePath).ToList(); List <string> turnoverResultFiles = new List <string>(); foreach (string file in DataFilesObservableCollection.Select(x => x.FilePath)) { string inputFile = file; string directory = Directory.GetParent(inputFile).FullName; string filename = Path.GetFileNameWithoutExtension(inputFile); //can be either the fast load file or the original input string fastLoadFile = Path.Combine(directory, filename + "_ApplETurnoverSavedSession.tsv"); if (File.Exists(fastLoadFile)) { Dispatcher.Invoke(() => { FilesToDisplayObservableCollection.Add(inputFile); }); quantifiedPeptideInputFiles.Remove(inputFile); //remove so we don't analyze it again quantifiedPeptideInputFiles.Remove(fastLoadFile); //remove so we don't analyze it again if (!DataPreparation.LoadExistingResults(inputFile, fastLoadFile, PoolParameterDictionary, AllPeptides, AnalyzedProteins, AnalyzedProteoforms)) { //something went wrong. Bail, reset, and run normally quantifiedPeptideInputFiles = ResetFastLoadAttempt(); break; } } else { //something went wrong. Bail, reset, and run normally quantifiedPeptideInputFiles = ResetFastLoadAttempt(); break; } } if (quantifiedPeptideInputFiles.Count != 0 && DatabasesObservableCollection.Count != 0) { AllowFileDrop = false; Dispatcher.Invoke(() => { ToggleButtons(); FilesToDisplayObservableCollection.Clear(); FilesToHideObservableCollection.Clear(); }); try { (sender as BackgroundWorker).ReportProgress(0, "Starting"); Settings settings = GetUserSpecifiedSettings(); string maxStatus = quantifiedPeptideInputFiles.Count.ToString(); int status = 1; //reading database (sender as BackgroundWorker).ReportProgress(0, "Reading Database"); List <Protein> theoreticalProteins = DataPreparation.LoadProteins(DatabasesObservableCollection.Select(x => x.FilePath).ToList()).OrderBy(x => x.Accession).ToList(); Dispatcher.Invoke(() => { PeptidesToDisplay.Clear(); AllPeptides.Clear(); }); foreach (string originalFile in quantifiedPeptideInputFiles) { string file = originalFile.Replace("_ApplETurnoverSavedSession", ""); //remove the extension if there was a failed load in a multi-file analysis string path = Path.Combine(Directory.GetParent(file).FullName, Path.GetFileNameWithoutExtension(file)); ////check if the file has already been analyzed //if (!File.Exists(path + "_TurnoverResults.txt")) //{ //Load data, filter, process, parsimony (sender as BackgroundWorker).ReportProgress(0, "Reading File " + status.ToString() + "/" + maxStatus + "..."); List <PeptideTurnoverObject> peptides = DataPreparation.ReadData(file, settings, theoreticalProteins); if (peptides.Count == 0) { throw new Exception("No peptides were found for file: " + file + "; did you select the correct search engine?"); } //Fit data to model, get half lives, confidence intervals (sender as BackgroundWorker).ReportProgress(0, "Analyzing File " + status.ToString() + "/" + maxStatus + "..."); //debug peptides = peptides.OrderBy(x => x.FullSequence).ToList(); for (int i = 1; i < peptides.Count; i++) { if (peptides[i].FullSequence.Equals(peptides[i - 1].FullSequence)) { } } PoolParameters poolParams = NonLinearRegression.RegressionAnalysis(peptides, file, settings); //get protein info var proteinGroups = peptides.GroupBy(x => x.Protein).ToList(); List <PeptideTurnoverObject> proteins = NonLinearRegression.GetProteinInfo(peptides, file, proteinGroups, "Protein"); //get proteoform info var proteoformGroups = peptides.GroupBy(x => x.Proteoform).ToList(); List <PeptideTurnoverObject> proteoforms = NonLinearRegression.GetProteinInfo(peptides, file, proteoformGroups, "Proteoform"); AnalyzedProteins.AddRange(proteins); AnalyzedProteoforms.AddRange(proteoforms); PoolParameterDictionary.Add(file, poolParams); PlotFit(poolParams, Path.GetFileNameWithoutExtension(file) + " Free Amino Acids"); //save results to allow for quick loading in the future string directory = Directory.GetParent(file).FullName; string filename = Path.GetFileNameWithoutExtension(file); string resultFile = Path.Combine(directory, filename + "_ApplETurnoverSavedSession.tsv"); DataPreparation.WriteQuickLoadFile(resultFile, poolParams, peptides, proteins, proteoforms); //Add the peptides/proteins to the collection for viewing in the GUI Dispatcher.Invoke(() => { foreach (PeptideTurnoverObject peptide in peptides) { AllPeptides.Add(peptide); PeptidesToDisplay.Add(peptide); } DisplayPeptidesDataGrid.Items.Refresh(); FilesToDisplayObservableCollection.Add(file); }); status++; } (sender as BackgroundWorker).ReportProgress(0, "Running Statistics"); TTest.CompareProteinsAcrossFiles(quantifiedPeptideInputFiles, AnalyzedProteins, PoolParameterDictionary); TTest.CompareProteoformsWithinFiles(quantifiedPeptideInputFiles, AnalyzedProteoforms, PoolParameterDictionary); (sender as BackgroundWorker).ReportProgress(0, "Finished!"); } catch (Exception ex) { MessageBox.Show("Task failed: " + ex.Message); (sender as BackgroundWorker).ReportProgress(0, "Task failed"); } AllowFileDrop = true; Dispatcher.Invoke(() => { ToggleButtons(); }); } else if (DataFilesObservableCollection.Count() != 0 && AllPeptides.Count != 0) //we had files to analyze but we were able to fast load them { Dispatcher.Invoke(() => { foreach (PeptideTurnoverObject peptide in AllPeptides) { PeptidesToDisplay.Add(peptide); } }); (sender as BackgroundWorker).ReportProgress(0, "Finished!"); } else { MessageBox.Show("Input files are missing. The run has been stopped."); (sender as BackgroundWorker).ReportProgress(0, "Task failed: Input missing files."); } }
public static void CompareProteinsAcrossFiles(List <string> filenames, List <PeptideTurnoverObject> allProteins, Dictionary <string, PoolParameters> poolParameterDictionary) { if (filenames.Count < 2) { return; } string outputDirectory = Path.GetDirectoryName(filenames.First()); Directory.CreateDirectory(Path.Combine(outputDirectory, "StatisticalComparisons")); for (int i = 0; i < filenames.Count; i++) { string fileOne = filenames[i]; PoolParameters paramsOne = poolParameterDictionary[fileOne]; //get the proteins for this file List <PeptideTurnoverObject> proteinsForFileOne = allProteins.Where(x => fileOne.Equals(x.FileName)).OrderBy(x => x.Protein).ToList(); for (int j = i + 1; j < filenames.Count; j++) { string fileTwo = filenames[j]; PoolParameters paramsTwo = poolParameterDictionary[fileTwo]; List <string> linesToWrite = new List <string>(); //add header linesToWrite.Add("Protein\tFold Change\tNeg. log(p-Value)\tHalf-life " + fileOne + "\tHalf-life " + fileTwo); List <PeptideTurnoverObject> proteinsForFileTwo = allProteins.Where(x => fileTwo.Equals(x.FileName)).OrderBy(x => x.Protein).ToList(); //get the overlap between them int a = 0; int b = 0; while (a < proteinsForFileOne.Count && b < proteinsForFileTwo.Count) { PeptideTurnoverObject proteinOne = proteinsForFileOne[a]; PeptideTurnoverObject proteinTwo = proteinsForFileTwo[b]; int comparison = (proteinOne.Protein).CompareTo(proteinTwo.Protein); if (comparison == 0) { //do the comparison (t-test of montecarlos, which dramatically overestimates the sample size) //Sample sampleOne = new Sample(proteinOne.MonteCarloKbis.Select(x => Math.Log10(2) / x)); //Sample sampleTwo = new Sample(proteinTwo.MonteCarloKbis.Select(x => Math.Log10(2) / x)); //TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); //linesToWrite.Add(proteinOne.Protein + "\t" + (Math.Log2(sampleTwo.Median) - Math.Log2(sampleOne.Median)).ToString() + '\t' + // (-1*Math.Log10(result.Probability)).ToString() + '\t' + (Math.Log10(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log10(2) / proteinTwo.Kbi).ToString()); //do the comparison (t-test of normalized ratios for all timepoints) double averageKbi = (proteinOne.Kbi + proteinTwo.Kbi) / 2; double normalizedHalfLife = Math.Log(2) / (averageKbi); //this is the day we're going to normalize all of the relative fractions to //create an array of a single value (the normalized timepoint) to create a new timepoint array double[] comparisonTimepointsOne = new double[proteinOne.Timepoints.Length]; double[] comparisonTimepointsTwo = new double[proteinTwo.Timepoints.Length]; for (int index = 0; index < comparisonTimepointsOne.Length; index++) { comparisonTimepointsOne[index] = normalizedHalfLife; } for (int index = 0; index < comparisonTimepointsTwo.Length; index++) { comparisonTimepointsTwo[index] = normalizedHalfLife; } //predict the expected values for the ratios of protein one based on the fit of the comparison double[] expectedOriginalRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsOne.Kst, paramsOne.Kbt, paramsOne.Kao, averageKbi, proteinOne.Timepoints); //predict the expected values for the ratios of proteoform one based on the fit of the comparison if they were all at the same normalized timepoint double[] expectedUpdatedRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsOne.Kst, paramsOne.Kbt, paramsOne.Kao, averageKbi, comparisonTimepointsOne); //do the same thing with protein two double[] expectedOriginalRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsTwo.Kst, paramsTwo.Kbt, paramsTwo.Kao, averageKbi, proteinTwo.Timepoints); double[] expectedUpdatedRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsTwo.Kst, paramsTwo.Kbt, paramsTwo.Kao, averageKbi, comparisonTimepointsTwo); //create empty arrays for the normalized ratios double[] normalizedRatiosOne = new double[expectedOriginalRatiosOne.Length]; double[] normalizedRatiosTwo = new double[expectedOriginalRatiosTwo.Length]; //calculate the normalized ratios by subtracting the expected ratio (so that we are measuring the residual between the point and the comparison fit) and then adding the ratio of the comparison fit at the normalized timepoint. for (int index = 0; index < proteinOne.RelativeFractions.Length; index++) { //the normalized ratio is equal to the original ratio minus the original fit to the data plus the fit if the kbi was averaged normalizedRatiosOne[index] = proteinOne.RelativeFractions[index] - expectedOriginalRatiosOne[index] + expectedUpdatedRatiosOne[index]; } for (int index = 0; index < proteinTwo.RelativeFractions.Length; index++) { normalizedRatiosTwo[index] = proteinTwo.RelativeFractions[index] - expectedOriginalRatiosTwo[index] + expectedUpdatedRatiosTwo[index]; } Sample sampleOne = new Sample(normalizedRatiosOne); Sample sampleTwo = new Sample(normalizedRatiosTwo); TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); linesToWrite.Add(proteinOne.Protein + "\t" + (Math.Log2(Math.Log(2) / proteinTwo.Kbi) - Math.Log2(Math.Log(2) / proteinOne.Kbi)).ToString() + '\t' + (-1 * Math.Log10(result.Probability)).ToString() + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString()); a++; b++; } else if (comparison < 0) { a++; } else { b++; } } File.WriteAllLines(Path.Combine(outputDirectory, "StatisticalComparisons", "Comparison_" + Path.GetFileNameWithoutExtension(fileOne) + "vs" + Path.GetFileNameWithoutExtension(fileTwo) + ".tsv"), linesToWrite); } } }
public static void CompareProteoformsWithinFiles(List <string> filenames, List <PeptideTurnoverObject> allProteins, Dictionary <string, PoolParameters> poolParameterDictionary) { for (int fileIndex = 0; fileIndex < filenames.Count; fileIndex++) { string filename = filenames[fileIndex]; PoolParameters poolParams = poolParameterDictionary[filename]; List <PeptideTurnoverObject> proteinsForThisFile = allProteins.Where(x => filename.Equals(x.FileName)).OrderBy(x => x.Proteoform).ToList(); List <string> linesToWrite = new List <string>(); linesToWrite.Add("Proteoform A\tProteoform B\tHalf-life A\tHalf-life B\tLog2(Fold Change)\tNeg. log(p-Value)"); int indexOfNextProteoformFamily = 0; for (int i = 0; i < proteinsForThisFile.Count; i++) { string currentProtein = proteinsForThisFile[i].Proteoform.Split('_')[0]; //find last index for this proteoform family indexOfNextProteoformFamily++; for (; indexOfNextProteoformFamily < proteinsForThisFile.Count; indexOfNextProteoformFamily++) { if (!currentProtein.Equals(proteinsForThisFile[indexOfNextProteoformFamily].Proteoform.Split('_')[0])) { break; } } for (; i < indexOfNextProteoformFamily; i++) { PeptideTurnoverObject proteinOne = proteinsForThisFile[i]; //see if it has a localized mod (or localized unmodified site), otherwise skip string[] proteoformOne = proteinOne.Proteoform.Split('@').ToArray(); if (proteoformOne.Length == 2) { for (int j = i + 1; j < indexOfNextProteoformFamily; j++) { PeptideTurnoverObject proteinTwo = proteinsForThisFile[j]; string[] proteoformTwo = proteinTwo.Proteoform.Split('@').ToArray(); //if these are a pair for the same modification site, then do the comparison if (proteoformTwo.Length == 2 && proteoformOne[1].Equals(proteoformTwo[1])) { //do the comparison (t-test of normalized ratios for all timepoints) double averageKbi = (proteinOne.Kbi + proteinTwo.Kbi) / 2; double normalizedHalfLife = Math.Log(2) / (averageKbi); //this is the day we're going to normalize all of the relative fractions to //create an array of a single value (the normalized timepoint) to create a new timepoint array double[] comparisonTimepointsOne = new double[proteinOne.Timepoints.Length]; double[] comparisonTimepointsTwo = new double[proteinTwo.Timepoints.Length]; for (int index = 0; index < comparisonTimepointsOne.Length; index++) { comparisonTimepointsOne[index] = normalizedHalfLife; } for (int index = 0; index < comparisonTimepointsTwo.Length; index++) { comparisonTimepointsTwo[index] = normalizedHalfLife; } double[] expectedOriginalRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, proteinOne.Timepoints); double[] expectedUpdatedRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, comparisonTimepointsOne); double[] expectedOriginalRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, proteinTwo.Timepoints); double[] expectedUpdatedRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, comparisonTimepointsTwo); double[] normalizedRatiosOne = new double[expectedOriginalRatiosOne.Length]; double[] normalizedRatiosTwo = new double[expectedOriginalRatiosTwo.Length]; for (int index = 0; index < proteinOne.RelativeFractions.Length; index++) { //the normalized ratio is equal to the original ratio minus the original fit to the data plus the fit if the kbi was averaged normalizedRatiosOne[index] = proteinOne.RelativeFractions[index] - expectedOriginalRatiosOne[index] + expectedUpdatedRatiosOne[index]; } for (int index = 0; index < proteinTwo.RelativeFractions.Length; index++) { normalizedRatiosTwo[index] = proteinTwo.RelativeFractions[index] - expectedOriginalRatiosTwo[index] + expectedUpdatedRatiosTwo[index]; } Sample sampleOne = new Sample(normalizedRatiosOne); Sample sampleTwo = new Sample(normalizedRatiosTwo); TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); try //sometimes crashes if stdev is zero { linesToWrite.Add(proteinOne.Proteoform + "\t" + proteinTwo.Proteoform + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString() + '\t' + (Math.Log2((Math.Log(2) / proteinTwo.Kbi)) - Math.Log2((Math.Log(2) / proteinOne.Kbi))).ToString() + '\t' + (-1 * Math.Log(result.Probability)).ToString()); } catch { linesToWrite.Add(proteinOne.Proteoform + "\t" + proteinTwo.Proteoform + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString() + '\t' + (Math.Log2(sampleTwo.Median) - Math.Log2(sampleOne.Median)).ToString() + '\t' + "NA"); } } } } } i--; } File.WriteAllLines(Path.Combine(Path.GetDirectoryName(filename), Path.GetFileNameWithoutExtension(filename) + "_Results", Path.GetFileNameWithoutExtension(filename) + "_ProteoformAnalysis.tsv"), linesToWrite); } }