private void UpdatePeptidePlots() { //if nothing is selected, plot the previous selection (if any) if (DisplayPeptidesDataGrid.SelectedItem == null) { if (PeptidesPreviouslyPlotted.Count != 0 && PeptidesPreviouslyPlotted.All(x => PeptidesToDisplay.Contains(x))) { PlotPeptideData(PeptidesPreviouslyPlotted); } //else do nothing } else { // plot the selected peptide/protein PeptideTurnoverObject row = (PeptideTurnoverObject)DisplayPeptidesDataGrid.SelectedItem; List <PeptideTurnoverObject> peptidesToPlot = PeptidesToDisplay.Where(x => x.FullSequence.Equals(row.FullSequence) && ((DisplayProteinInSpecificTable && x.Protein.Equals(row.Protein)) || x.Proteoform.Equals(row.Proteoform))).ToList(); for (int i = peptidesToPlot.Count - 1; i >= 0; i--) { if (FilesToHideObservableCollection.Contains(peptidesToPlot[i].FileName)) { peptidesToPlot.RemoveAt(i); } } PlotPeptideData(peptidesToPlot); PeptidesPreviouslyPlotted = peptidesToPlot; } }
public static List <PeptideTurnoverObject> GetProteinInfo(List <PeptideTurnoverObject> peptides, string filePath, List <IGrouping <string, PeptideTurnoverObject> > grouping, string analysisType) { //group peptides by protein List <PeptideTurnoverObject> proteinsToReturn = new List <PeptideTurnoverObject>(); foreach (var group in grouping) { string proteinName = group.Key; var peptidesForThisProtein = group.OrderBy(x => x.BaseSequence).ThenBy(x => x.FullSequence).ToList(); List <double> kbis = new List <double>(); List <double> timepoints = new List <double>(); List <double> rfs = new List <double>(); List <string> filenames = new List <string>(); List <double> intensities = new List <double>(); double intensity = 0; string allPeptideSequences = ""; foreach (var peptide in peptidesForThisProtein) { kbis.AddRange(peptide.MonteCarloKbis); timepoints.AddRange(peptide.Timepoints); rfs.AddRange(peptide.RelativeFractions); filenames.AddRange(peptide.Filenames); intensities.AddRange(peptide.Intensities); intensity += peptide.TotalIntensity; allPeptideSequences += peptide.FullSequence + ";"; } //keep excel compatible if (allPeptideSequences.Length > 10000) { allPeptideSequences = "Too Many Sequences"; } kbis.Sort(); //get important measurements PeptideTurnoverObject protein = new PeptideTurnoverObject(allPeptideSequences, timepoints.ToArray(), rfs.ToArray(), filenames.ToArray(), intensities.ToArray(), intensity, peptides.First().FileName, proteinName); //we do 200*n iterations, so we want the average half-life (not kbi) of the 99*nth and 100*nth index to get the median half-life. Then transform back to kbi double medianHalfLifeLow = Math.Log(2, Math.E) / kbis[kbis.Count / 2 - 1]; double medianHalfLifeHigh = Math.Log(2, Math.E) / kbis[kbis.Count / 2]; double medianHalfLife = (medianHalfLifeHigh + medianHalfLifeLow) / 2; protein.Kbi = Math.Log(2, Math.E) / medianHalfLife; protein.LowKbi = kbis[(int)Math.Round((kbis.Count - 1) * 2.5 / 100)]; //2.5 percentile protein.HighKbi = kbis[(int)Math.Round((kbis.Count - 1) * 97.5 / 100)]; //97.5 percentile protein.MonteCarloKbis = kbis.ToArray(); protein.Error = peptidesForThisProtein.Count; //not the actual error proteinsToReturn.Add(protein); } string directory = Directory.GetParent(filePath).FullName; string filename = Path.GetFileNameWithoutExtension(filePath); List <string> linesToWrite = new List <string>(); linesToWrite.Add("Protein\tHalf Life\tLowerConfidenceInterval\tUpperConfidenceInterval\tSummed Intensity\tNumber of Ratios\tNumber of Peptides\tPeptideSequences"); foreach (PeptideTurnoverObject protein in proteinsToReturn) { linesToWrite.Add(protein.Protein + '\t' + (Math.Log(2, Math.E) / protein.Kbi).ToString() + '\t' + (Math.Log(2, Math.E) / protein.HighKbi).ToString() + '\t' + (Math.Log(2, Math.E) / protein.LowKbi).ToString() + '\t' + protein.TotalIntensity.ToString() + '\t' + protein.Timepoints.Length.ToString() + '\t' + protein.Error.ToString() + '\t' + protein.FullSequence); } //output each peptide with its sequence, kbi, 95% confidence interval, and protein File.WriteAllLines(Path.Combine(directory, filename + "_Results", filename + "_" + analysisType + "TurnoverResults.tsv"), linesToWrite); return(proteinsToReturn); }
public static void UpdateKbiConfidenceInterval(double kst, double kbt, double kao, PeptideTurnoverObject peptide, double ITERATIVE_SHIFT) { double[] timepoints = peptide.Timepoints; //int numMeasurements = timepoints.Length; double[] relativeFraction = peptide.RelativeFractions; //double[] residuals = new double[numMeasurements]; double originalKbi = peptide.Kbi; //save it so it doesn't get overwritten //double[] distinctPredictedValues = PredictRelativeFractionUsingThreeCompartmentModel(kst, kbt, kao, peptide.Kbi, timepoints).Distinct().ToArray(); //double[] uniqueTimePoints = timepoints.Distinct().ToArray(); List <List <double> > relativeFractionsForEachTimePoint = new List <List <double> >(); double previousTimepoint = timepoints[0]; int previousI = 0; //get predicted values double[] predicted = PredictRelativeFractionUsingThreeCompartmentModel(kst, kbt, kao, peptide.Kbi, timepoints); //find sigma for each timepoint List <double> sigmasForEachTimePoint = new List <double>(); List <double> sigmasForThisTimepoint = new List <double> { Math.Pow((relativeFraction[0] - predicted[0]), 2) }; List <double> relativeFractionsForThisTimepoint = new List <double> { relativeFraction[0] }; for (int i = 1; i < timepoints.Length; i++) { if (timepoints[i].Equals(previousTimepoint)) { sigmasForThisTimepoint.Add(Math.Pow((relativeFraction[i] - predicted[i]), 2)); relativeFractionsForThisTimepoint.Add(relativeFraction[i]); } else { for (; previousI < i; previousI++) { sigmasForEachTimePoint.Add(Math.Sqrt(sigmasForThisTimepoint.Sum() / (Math.Max(sigmasForThisTimepoint.Count - 1, 1)))); //prevent infinity if only one point relativeFractionsForEachTimePoint.Add(relativeFractionsForThisTimepoint); } sigmasForThisTimepoint = new List <double> { Math.Pow((relativeFraction[i] - predicted[i]), 2) }; relativeFractionsForThisTimepoint = new List <double> { relativeFraction[i] }; previousTimepoint = timepoints[i]; } } for (; previousI < timepoints.Length; previousI++) { sigmasForEachTimePoint.Add(Math.Sqrt(sigmasForThisTimepoint.Sum() / (Math.Max(sigmasForThisTimepoint.Count - 1, 1)))); //prevent infinity if only one point relativeFractionsForEachTimePoint.Add(relativeFractionsForThisTimepoint); //add the last one } ////BELOW CODE ONLY CREATES ONE LIST FOR EACH UNIQUE TIMEPOINT //List<double> relativeFractionsForThisTimepoint = new List<double> { relativeFraction[0] }; //for(int i=1; i<timepoints.Length;i++) //{ // if(timepoints[i].Equals(previousTimepoint)) // { // relativeFractionsForThisTimepoint.Add(relativeFraction[i]); // } // else // { // relativeFractionsForEachTimePoint.Add(relativeFractionsForThisTimepoint); // relativeFractionsForThisTimepoint = new List<double> { relativeFraction[i] }; // previousTimepoint = timepoints[i]; // } //} //relativeFractionsForEachTimePoint.Add(relativeFractionsForThisTimepoint); //add the last one //use as an index double originalError = peptide.Error; //for (int i = 0; i < numMeasurements; i++) //{ // residuals[i] = relativeFraction[i] - predictedValues[i]; //} int NUM_SIMULATIONS = 200; int lowPercentile = (int)Math.Round((NUM_SIMULATIONS - 1) * 2.5 / 100); //2.5 percentile int highPercentile = (int)Math.Round((NUM_SIMULATIONS - 1) * 97.5 / 100); //97.5 percentile //simulate data through monte carlo (or bootstrapping, commented out) Random rng = new Random(1); //double sigma = Math.Sqrt(peptide.Error * timepoints.Length / (timepoints.Length - 4)); //The error is actually the MSE, we want SSE, so multiply, by timepoints, divide by degrees of freedom with 4 free variables double[] bootstrapKbis = new double[NUM_SIMULATIONS]; for (int s = 0; s < NUM_SIMULATIONS; s++) { double[] simulatedData = new double[timepoints.Length]; for (int i = 0; i < timepoints.Length; i++) { //bootstrap //simulatedData[i] = predictedValues[i] + residuals[rng.Next(numMeasurements)]; //monte carlo List <double> actualValuesForThisTimepoint = relativeFractionsForEachTimePoint[i]; //take an actual value (sampling with replacement) and then add variance based on the residual double sampleValue = actualValuesForThisTimepoint[rng.Next(actualValuesForThisTimepoint.Count)]; double varianceToAdd = NormInv(rng.NextDouble(), 0, sigmasForEachTimePoint[i]); double simulatedValue = sampleValue + varianceToAdd; //don't allow the simulated value to be an unreal value if (simulatedValue > 1) { simulatedValue = 1; } if (simulatedValue < 0) { simulatedValue = 0; } simulatedData[i] = simulatedValue; //simulatedData[i] = distinctPredictedValues[i] + NormInv(rng.NextDouble(), 0, sigma); } peptide.RelativeFractions = simulatedData; //optimize on the simulated data UpdateKbi(kst, kbt, kao, peptide, ITERATIVE_SHIFT); UpdateKbi(kst, kbt, kao, peptide, ITERATIVE_SHIFT / 10); UpdateKbi(kst, kbt, kao, peptide, ITERATIVE_SHIFT / 100); bootstrapKbis[s] = peptide.Kbi; } ////THE BELOW COMMENTED CODE ALLOWED FOR ONLY ONE SIMULATED POINT PER TIMEPOINT //double sigma = Math.Sqrt(peptide.Error * timepoints.Length / (timepoints.Length - 4)); //The error is actually the MSE, we want SSE, so multiply, by timepoints, divide by degrees of freedom with 4 free variables //double[] bootstrapKbis = new double[NUM_SIMULATIONS]; //peptide.Timepoints = uniqueTimePoints; //for (int s = 0; s < NUM_SIMULATIONS; s++) //{ // double[] simulatedData = new double[uniqueTimePoints.Length]; // for (int i = 0; i < uniqueTimePoints.Length; i++) // { // //bootstrap // //simulatedData[i] = predictedValues[i] + residuals[rng.Next(numMeasurements)]; // //monte carlo // List<double> actualValuesForThisTimepoint = relativeFractionsForEachTimePoint[i]; // simulatedData[i] = actualValuesForThisTimepoint[rng.Next(actualValuesForThisTimepoint.Count)] + NormInv(rng.NextDouble(), 0, sigma); // //simulatedData[i] = distinctPredictedValues[i] + NormInv(rng.NextDouble(), 0, sigma); // } // peptide.RelativeFractions = simulatedData; // //optimize on the simulated data // UpdateKbi(kst, kbt, kao, peptide, ITERATIVE_SHIFT); // bootstrapKbis[s] = peptide.Kbi; //} peptide.Kbi = originalKbi; peptide.RelativeFractions = relativeFraction; peptide.Timepoints = timepoints; peptide.Error = originalError; Array.Sort(bootstrapKbis); peptide.LowKbi = bootstrapKbis[lowPercentile]; //2.5 percentile peptide.HighKbi = bootstrapKbis[highPercentile]; //97.5 percentile peptide.MonteCarloKbis = bootstrapKbis; }
public static void UpdateKbi(double kst, double kbt, double kao, PeptideTurnoverObject peptide, double ITERATIVE_SHIFT = ITERATIVE_SHIFT) { //List<PeptideTurnoverValues> peptidesToRemove = new List<PeptideTurnoverValues>(); const double MAX_KBI = 2.0; double MIN_KBI = ITERATIVE_SHIFT * 10; double ARBITRARY_GRADIENT_FACTOR = 50 * ITERATIVE_SHIFT; double[] timepoints = peptide.Timepoints; double[] relativeFractions = peptide.RelativeFractions; double kbi = peptide.Kbi; // if (peptide.Error==double.PositiveInfinity) //first time { peptide.Error = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi, timepoints, relativeFractions); peptide.TemporaryError = peptide.Error; } double originalError = peptide.Error; double updatedError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi + ITERATIVE_SHIFT, timepoints, relativeFractions); bool increaseKbi = true; if (!(updatedError < originalError)) { updatedError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi - ITERATIVE_SHIFT, timepoints, relativeFractions); increaseKbi = false; } if (originalError > updatedError) { double diff = (originalError - updatedError) / ARBITRARY_GRADIENT_FACTOR; if (diff > ITERATIVE_SHIFT * 3) { diff = Math.Round(diff / ITERATIVE_SHIFT) * ITERATIVE_SHIFT; double tempError; if (increaseKbi) { if (kbi + diff > MAX_KBI) { diff = MAX_KBI - diff - ITERATIVE_SHIFT; } tempError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi + diff, timepoints, relativeFractions); } else { if (kbi - diff < MIN_KBI) { diff = kbi - MIN_KBI - ITERATIVE_SHIFT; } tempError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi - diff, timepoints, relativeFractions); } if (!(tempError < updatedError)) { diff = ITERATIVE_SHIFT; } else { updatedError = tempError; } } else { diff = ITERATIVE_SHIFT; } if (increaseKbi) { kbi += diff; } else { kbi -= diff; } while (originalError > updatedError) { if (kbi > MAX_KBI || kbi < MIN_KBI) //max and min allowed { //peptidesToRemove.Add(peptide); break; } originalError = updatedError; updatedError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi + ITERATIVE_SHIFT, timepoints, relativeFractions); increaseKbi = true; if (!(updatedError < originalError)) { updatedError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi - ITERATIVE_SHIFT, timepoints, relativeFractions); increaseKbi = false; } diff = (originalError - updatedError) / ARBITRARY_GRADIENT_FACTOR; if (diff > ITERATIVE_SHIFT * 3) { diff = Math.Round(diff / ITERATIVE_SHIFT) * ITERATIVE_SHIFT; double tempError; if (increaseKbi) { if (kbi + diff > MAX_KBI) { diff = MAX_KBI - diff - ITERATIVE_SHIFT; } tempError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi + diff, timepoints, relativeFractions); } else { if (kbi - diff < MIN_KBI) { diff = kbi - MIN_KBI - ITERATIVE_SHIFT; } tempError = CalculateErrorForThreeCompartmentModelFit(kst, kbt, kao, kbi - diff, timepoints, relativeFractions); } if (!(tempError < updatedError)) { diff = ITERATIVE_SHIFT; } else { updatedError = tempError; } } else { diff = ITERATIVE_SHIFT; } if (increaseKbi) { kbi += diff; } else { kbi -= diff; } } peptide.TemporaryError = originalError; if (increaseKbi) { kbi -= diff; } else { kbi += diff; } peptide.Kbi = kbi; } peptide.UpdateError(); }
public static bool LoadExistingResults(string inputFile, string fileToLoad, Dictionary <string, PoolParameters> poolParameterDictionary, ObservableCollection <PeptideTurnoverObject> peptides, List <PeptideTurnoverObject> proteins, List <PeptideTurnoverObject> proteoforms) { try { string[] lines = File.ReadAllLines(fileToLoad); //We need to read in the: //-pool parameters double[] poolParams = lines[0].Split('\t').Select(x => Convert.ToDouble(x)).ToArray(); poolParameterDictionary[inputFile] = new PoolParameters(poolParams[0], poolParams[1], poolParams[2]); //-peptides int i = 1; for (; i < lines.Length; i++) { string[] line = lines[i].Split('\t').ToArray(); if (line.Length == 1) { i++; break; } PeptideTurnoverObject peptide = new PeptideTurnoverObject( line[0], line[1].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[2].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[3].Split(';'), line[4].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), Convert.ToDouble(line[5]), inputFile, //file line[7], line[8]); peptide.Kbi = Convert.ToDouble(line[9]); peptide.Error = Convert.ToDouble(line[10]); peptide.LowKbi = Convert.ToDouble(line[11]); peptide.HighKbi = Convert.ToDouble(line[12]); peptides.Add(peptide); } //-Proteins for (; i < lines.Length; i++) { string[] line = lines[i].Split('\t').ToArray(); if (line.Length == 1) { i++; break; } PeptideTurnoverObject protein = new PeptideTurnoverObject( line[0], line[1].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[2].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[3].Split(';'), line[4].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), Convert.ToDouble(line[5]), inputFile, //file line[7]); protein.Kbi = Convert.ToDouble(line[8]); protein.LowKbi = Convert.ToDouble(line[9]); protein.HighKbi = Convert.ToDouble(line[10]); proteins.Add(protein); } //-Proteoforms for (; i < lines.Length; i++) { string[] line = lines[i].Split('\t').ToArray(); PeptideTurnoverObject proteoform = new PeptideTurnoverObject( line[0], line[1].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[2].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), line[3].Split(';'), line[4].Split(';').Select(x => Convert.ToDouble(x)).ToArray(), Convert.ToDouble(line[5]), inputFile, //file line[7]); proteoform.Kbi = Convert.ToDouble(line[8]); proteoform.LowKbi = Convert.ToDouble(line[9]); proteoform.HighKbi = Convert.ToDouble(line[10]); proteoforms.Add(proteoform); } return(true); } catch { return(false); } }
public static List <PeptideTurnoverObject> ReadData(string file, Settings settings, List <Protein> theoreticalProteins) { List <string> columnIndexToSampleName = new List <string>(); List <double> columnIndexToTimepoint = new List <double>(); Dictionary <string, double> sampleToTimepoint = new Dictionary <string, double>(); Dictionary <double, List <string> > timepointToSamples = new Dictionary <double, List <string> >(); List <PeptideTurnoverObject> peptides = new List <PeptideTurnoverObject>(); if (settings.UpstreamProgram == Settings.SearchEngine.MetaMorpheus) { int firstIntensityIndex = -1; string[] lines = File.ReadAllLines(file); string[] header = lines[0].Split('\t'); //Get the header information for which columns are which samples and which timepoints for (int i = 0; i < header.Length; i += 2) { if (!header[i].Contains("Intensity_")) { i++; while (i < header.Length && !header[i].Contains("Intensity_")) { i++; } if (firstIntensityIndex == -1) { firstIntensityIndex = i; } i -= 2; } else { string columnHeader = header[i].Replace('-', '_'); string[] columnHeaderSplit = columnHeader.Split('_'); string sampleName = columnHeader.Substring(0, columnHeader.Length - columnHeaderSplit[columnHeaderSplit.Length - 1].Length - 1); columnIndexToSampleName.Add(sampleName); for (int j = 0; j < columnHeaderSplit.Length; j++) { string timepointString = columnHeaderSplit[j]; double time = -1; if (timepointString[0] == 'd' || timepointString[0] == 'D') { try { time = Convert.ToDouble(timepointString.Substring(1)); } catch { }; } else if (timepointString[timepointString.Length - 1] == 'd' || timepointString[timepointString.Length - 1] == 'D') { try { time = Convert.ToDouble(timepointString.Substring(0, timepointString.Length - 1)); } catch { }; } if (time != -1) { if (timepointToSamples.ContainsKey(time)) { timepointToSamples[time].Add(sampleName); } else { timepointToSamples[time] = new List <string> { sampleName }; } sampleToTimepoint[sampleName] = time; columnIndexToTimepoint.Add(time); break; } } } } //Diagnostics //int numOriginalPeps = 0; //int numSurvivingPeps = 0; ////List<string> originalProteins = new List<string>(); ////List<string> survivingProteins = new List<string>(); //int[] originalDistribution = new int[25]; //int[] postBadRemovalDistribution = new int[25]; //int[] postRequirementPerTimepoint = new int[25]; //HashSet<string> originalProteins = new HashSet<string>(); //HashSet<string> survivingProteins = new HashSet<string>(); //10 investigate //int[] num3 = new int[6]; //int[] num7 = new int[6]; //int[] num14 = new int[6]; //int[] num30 = new int[6]; //int[] num60 = new int[6]; //List<(double, string)> peptidesWith10Values = new List<(double, string)>(); //read in the intensities bool peptideInput = header[0].Equals("Sequence"); for (int i = 1; i < lines.Length; i++) { string[] line = lines[i].Split('\t'); string sequence = line[0]; string protein = peptideInput ? line[2] : line[0]; List <double> timepointsForThisPeptide = new List <double>(); List <double> rfValuesForThisPeptide = new List <double>(); List <string> filenamesForThisPeptide = new List <string>(); List <double> intensitiesForThisPeptide = new List <double>(); //Diagnostics //numOriginalPeps++; //originalProteins.Add(protein); int numOriginal = 0; int numPostBadRemoval = 0; int numPostRequirementPerTimepoint = 0; double highestIntensity = 0; double averageIntensity = 0; for (int column = firstIntensityIndex; column < firstIntensityIndex + columnIndexToSampleName.Count * 2; column += 2) { double originalIntensity = line[column].Length == 0 ? 0 : Convert.ToDouble(line[column]); double newlySynthesizedIntensity = line[column + 1].Length == 0 ? 0 : Convert.ToDouble(line[column + 1]); bool atLeastOneIntensity = originalIntensity != 0 || newlySynthesizedIntensity != 0; if (atLeastOneIntensity) { numOriginal++; averageIntensity += originalIntensity + newlySynthesizedIntensity; if (originalIntensity + newlySynthesizedIntensity > highestIntensity) { highestIntensity = originalIntensity + newlySynthesizedIntensity; } } if ((settings.UseBadRatios && atLeastOneIntensity) || (originalIntensity != 0 && newlySynthesizedIntensity != 0)) { numPostBadRemoval++; int indexLookup = (column - firstIntensityIndex) / 2; timepointsForThisPeptide.Add(columnIndexToTimepoint[indexLookup]); rfValuesForThisPeptide.Add(newlySynthesizedIntensity / (originalIntensity + newlySynthesizedIntensity)); filenamesForThisPeptide.Add(columnIndexToSampleName[indexLookup]); intensitiesForThisPeptide.Add(originalIntensity + newlySynthesizedIntensity); } } //if (numOriginal == 10) //{ // num3[timepointsForThisPeptide.Count(x => x > 2 && x < 4)]++; // num7[timepointsForThisPeptide.Count(x => x > 6 && x < 8)]++; // num14[timepointsForThisPeptide.Count(x => x > 13 && x < 15)]++; // num30[timepointsForThisPeptide.Count(x => x > 29 && x < 31)]++; // num60[timepointsForThisPeptide.Count(x => x > 59 && x < 61)]++; // peptidesWith10Values.Add((intensitiesForThisPeptide.Median(), sequence)); //} //remove timepoints with too little data foreach (double timepoint in timepointToSamples.Keys) { List <int> indicesForThisTimepoint = new List <int>(); for (int index = 0; index < timepointsForThisPeptide.Count; index++) { if (timepointsForThisPeptide[index].Equals(timepoint)) { numPostRequirementPerTimepoint++; indicesForThisTimepoint.Add(index); } } if (indicesForThisTimepoint.Count < settings.MinValidValuesPerTimepoint) { for (int index = indicesForThisTimepoint.Count - 1; index >= 0; index--) { numPostRequirementPerTimepoint--; int actualIndex = indicesForThisTimepoint[index]; timepointsForThisPeptide.RemoveAt(actualIndex); rfValuesForThisPeptide.RemoveAt(actualIndex); filenamesForThisPeptide.RemoveAt(actualIndex); intensitiesForThisPeptide.RemoveAt(actualIndex); } } } if (timepointsForThisPeptide.Count >= settings.MinValidValuesTotal) { // numSurvivingPeps++; // survivingProteins.Add(protein); peptides.Add(new PeptideTurnoverObject(sequence, timepointsForThisPeptide.ToArray(), rfValuesForThisPeptide.ToArray(), filenamesForThisPeptide.ToArray(), intensitiesForThisPeptide.ToArray(), intensitiesForThisPeptide.Sum(), file, protein)); } //originalDistribution[numOriginal]++; //postBadRemovalDistribution[numPostBadRemoval]++; //postRequirementPerTimepoint[numPostRequirementPerTimepoint]++; } string pathToWrite = file.Substring(0, file.Length - 4) + "_Results"; Directory.CreateDirectory(pathToWrite); //DIAGNOSTICS //List<string> linesToWrite = new List<string>(); //linesToWrite.Add("Original Peptides:\t" + numOriginalPeps.ToString()); //linesToWrite.Add("Surviving Peptides:\t" + numSurvivingPeps.ToString()); //linesToWrite.Add("Original Proteins:\t" + originalProteins.Count.ToString()); //linesToWrite.Add("Surviving Proteins:\t" + survivingProteins.Count.ToString()); //linesToWrite.Add(""); //linesToWrite.Add("NumPeptides\tOriginal\tPostBadRatios\tPostTimepointMin"); //for (int i = 0; i < 25; i++) //{ // linesToWrite.Add(i.ToString() + '\t' + originalDistribution[i].ToString() + '\t' + postBadRemovalDistribution[i].ToString() + '\t' + postRequirementPerTimepoint[i].ToString()); //} //string filename = Path.GetFileNameWithoutExtension(file); //pathToWrite = Path.Combine(pathToWrite, filename); //File.WriteAllLines(pathToWrite + "_Diagnostics.tsv", linesToWrite); //linesToWrite.Clear(); //linesToWrite.Add("\t3\t7\t14\t30\t60"); //for(int i=0; i<6; i++) //{ // linesToWrite.Add(i.ToString() + '\t' + num3[i].ToString() + '\t' + num7[i].ToString() + '\t' + num14[i].ToString() + '\t' + num30[i].ToString() + '\t' + num60[i].ToString()); //} //peptidesWith10Values = peptidesWith10Values.OrderByDescending(x => x.Item1).ToList(); //linesToWrite.Add(""); //linesToWrite.Add(peptidesWith10Values[0].Item2); //linesToWrite.Add(peptidesWith10Values[1].Item2); //linesToWrite.Add(peptidesWith10Values[2].Item2); //linesToWrite.Add(peptidesWith10Values[3].Item2); //linesToWrite.Add(peptidesWith10Values[4].Item2); //File.WriteAllLines(pathToWrite + "_DiagnosticsFor10ValidFiles.tsv", linesToWrite); //int a = 0; } else //if maxquant (not really maxquant, but the same format used in Alevra, M.; Mandad, S.; Ischebeck, T.; Urlaub, H.; Rizzoli, S. O.; Fornasiero, E. F. A Mass Spectrometry Workflow for Measuring Protein Turnover Rates in Vivo. Nature Protocols 2019. https://doi.org/10.1038/s41596-019-0222-y.) { int firstRatioIndex = -1; int firstIntensityIndex = -1; string[] lines = File.ReadAllLines(file); string[] header = lines[0].Split('\t'); bool peptideInput = header[0].Equals("Sequence"); //Get the header information for which columns are which samples and which timepoints if (peptideInput) { for (int i = 0; i < header.Length; i += 6) //6 is the spacing for the actual, normalized, variablitiy, count, iso, type { if (!header[i].Contains("Ratio H/L")) { i++; while (i < header.Length && !header[i].Contains("Ratio H/L")) { if (header[i].Contains("Intensity")) //intensities are after the ratios. Reason unknown, they do not match the ratios { firstIntensityIndex = i + 3; i = header.Length; //end } i++; } if (firstRatioIndex == -1) { firstRatioIndex = i + 6; } //skip the first one because it's the aggregate } else { string sampleName = header[i].Replace('-', '_').Replace(' ', '_').Substring("Ratio H/L ".Length); string[] columnHeaderSplit = sampleName.Split('_'); columnIndexToSampleName.Add(sampleName); double time = Convert.ToDouble(columnHeaderSplit[0]); if (timepointToSamples.ContainsKey(time)) { timepointToSamples[time].Add(sampleName); } else { timepointToSamples[time] = new List <string> { sampleName }; } sampleToTimepoint[sampleName] = time; columnIndexToTimepoint.Add(time); } } //read in the intensities for (int i = 1; i < lines.Length; i++) { string[] line = lines[i].Split('\t'); string sequence = line[0]; string protein = line[34]; List <double> timepointsForThisPeptide = new List <double>(); List <double> rfValuesForThisPeptide = new List <double>(); List <string> filenamesForThisPeptide = new List <string>(); List <double> intensitiesForThisPeptide = new List <double>(); for (int column = firstRatioIndex; column < firstRatioIndex + columnIndexToSampleName.Count * 6; column += 6) { if (!line[column].Equals("NaN")) { int indexLookup = (column - firstRatioIndex) / 6; timepointsForThisPeptide.Add(columnIndexToTimepoint[indexLookup]); double ratio = Convert.ToDouble(line[column]); rfValuesForThisPeptide.Add(1 - ratio / (ratio + 1)); //convert H/L to L/Total //TODO remove the "1-" for normal experiments before release filenamesForThisPeptide.Add(columnIndexToSampleName[indexLookup]); intensitiesForThisPeptide.Add(Convert.ToDouble(line[(column - firstRatioIndex) / 6 + firstIntensityIndex])); } } //remove timepoints with too little data foreach (double timepoint in timepointToSamples.Keys) { List <int> indicesForThisTimepoint = new List <int>(); for (int index = 0; index < timepointsForThisPeptide.Count; index++) { if (timepointsForThisPeptide[index].Equals(timepoint)) { indicesForThisTimepoint.Add(index); } } if (indicesForThisTimepoint.Count < settings.MinValidValuesPerTimepoint) { for (int index = indicesForThisTimepoint.Count - 1; index >= 0; index--) { int actualIndex = indicesForThisTimepoint[index]; timepointsForThisPeptide.RemoveAt(actualIndex); rfValuesForThisPeptide.RemoveAt(actualIndex); filenamesForThisPeptide.RemoveAt(actualIndex); intensitiesForThisPeptide.RemoveAt(actualIndex); } } } if (timepointsForThisPeptide.Count >= settings.MinValidValuesTotal) { peptides.Add(new PeptideTurnoverObject(sequence, timepointsForThisPeptide.ToArray(), rfValuesForThisPeptide.ToArray(), filenamesForThisPeptide.ToArray(), intensitiesForThisPeptide.ToArray(), intensitiesForThisPeptide.Sum(), file, protein)); } } } else //if protein input { for (int i = 0; i < header.Length; i++) //no spacing here { if (!header[i].Contains("Ratio H/L")) { i++; while (i < header.Length && !header[i].Contains("Ratio H/L")) { if (header[i].Contains("Intensity")) //intensities are after the ratios. Reason unknown, they do not match the ratios { firstIntensityIndex = i + 3; i = header.Length; //end } i++; } if (firstRatioIndex == -1) { firstRatioIndex = i; } //skip the first one because it's the aggregate } else { if (header[i].Contains("normalized")) { while (i < header.Length) { if (header[i].Contains("Intensity")) //intensities are after the ratios. Reason unknown, they do not match the ratios { firstIntensityIndex = i + 3; i = header.Length; //end } i++; } break; } string sampleName = header[i].Replace('-', '_').Replace(' ', '_').Substring("Ratio H/L ".Length); string[] columnHeaderSplit = sampleName.Split('_'); columnIndexToSampleName.Add(sampleName); double time = Convert.ToDouble(columnHeaderSplit[0]); if (timepointToSamples.ContainsKey(time)) { timepointToSamples[time].Add(sampleName); } else { timepointToSamples[time] = new List <string> { sampleName }; } sampleToTimepoint[sampleName] = time; columnIndexToTimepoint.Add(time); } } //read in the intensities for (int i = 1; i < lines.Length; i++) { string[] line = lines[i].Split('\t'); string sequence = line[0]; string protein = line[0]; List <double> timepointsForThisPeptide = new List <double>(); List <double> rfValuesForThisPeptide = new List <double>(); List <string> filenamesForThisPeptide = new List <string>(); List <double> intensitiesForThisPeptide = new List <double>(); for (int column = firstRatioIndex; column < firstRatioIndex + columnIndexToSampleName.Count; column++) { if (!line[column].Equals("NaN")) { int indexLookup = (column - firstRatioIndex); timepointsForThisPeptide.Add(columnIndexToTimepoint[indexLookup]); double ratio = Convert.ToDouble(line[column]); rfValuesForThisPeptide.Add(1 - ratio / (ratio + 1)); //convert H/L to L/Total //TODO remove the "1-" for normal experiments before release filenamesForThisPeptide.Add(columnIndexToSampleName[indexLookup]); intensitiesForThisPeptide.Add(Convert.ToDouble(line[(column - firstRatioIndex) + firstIntensityIndex])); } } //remove timepoints with too little data foreach (double timepoint in timepointToSamples.Keys) { List <int> indicesForThisTimepoint = new List <int>(); for (int index = 0; index < timepointsForThisPeptide.Count; index++) { if (timepointsForThisPeptide[index].Equals(timepoint)) { indicesForThisTimepoint.Add(index); } } if (indicesForThisTimepoint.Count < settings.MinValidValuesPerTimepoint) { for (int index = indicesForThisTimepoint.Count - 1; index >= 0; index--) { int actualIndex = indicesForThisTimepoint[index]; timepointsForThisPeptide.RemoveAt(actualIndex); rfValuesForThisPeptide.RemoveAt(actualIndex); filenamesForThisPeptide.RemoveAt(actualIndex); intensitiesForThisPeptide.RemoveAt(actualIndex); } } } if (timepointsForThisPeptide.Count >= settings.MinValidValuesTotal) { peptides.Add(new PeptideTurnoverObject(sequence, timepointsForThisPeptide.ToArray(), rfValuesForThisPeptide.ToArray(), filenamesForThisPeptide.ToArray(), intensitiesForThisPeptide.ToArray(), intensitiesForThisPeptide.Sum(), file, protein)); } } } } //create a hash table for quick lookups //the idea is to break up the proteins into k-mers (must be less than shortest peptide length) and look up the starts of those Dictionary <Protein, Dictionary <string, List <int> > > theoreticalProteinLookupTable = new Dictionary <Protein, Dictionary <string, List <int> > >(); const int kMerLength = 6; foreach (Protein p in theoreticalProteins) { string baseSequence = p.BaseSequence; Dictionary <string, List <int> > lookupTable = new Dictionary <string, List <int> >(); for (int i = 0; i < baseSequence.Length - kMerLength + 1; i++) { string kmer = baseSequence.Substring(i, kMerLength); if (lookupTable.TryGetValue(kmer, out var value)) { value.Add(i); } else { lookupTable.Add(kmer, new List <int> { i }); } } if (!theoreticalProteinLookupTable.Keys.Contains(p)) { theoreticalProteinLookupTable.Add(p, lookupTable); } } //lookup the sequence in the database //sort peptides = peptides.Where(x => x.RelativeFractions.Length != 0).OrderBy(x => x.BaseSequence).ToList(); //do reverse parsimony int[] threads = Enumerable.Range(0, Environment.ProcessorCount).ToArray(); //int[] threads = Enumerable.Range(0, 1).ToArray(); Parallel.ForEach(threads, (thread) => { string mostRecentBaseSequence = ""; int mostRecentIndex = -1; int max = (thread + 1) * peptides.Count / threads.Length; for (int i = thread * peptides.Count / threads.Length; i < max; i++) { PeptideTurnoverObject currentPeptide = peptides[i]; //if same base seq, just reuse the old info if (!currentPeptide.BaseSequence.Equals(mostRecentBaseSequence)) { mostRecentBaseSequence = currentPeptide.BaseSequence; //find proteins containing this sequence //List<Protein> proteinsContainingThisSeq = theoreticalProteins.Where(x => x.BaseSequence.Contains(mostRecentBaseSequence)).OrderBy(x => x.Accession).ToList(); List <Protein> proteinsContainingThisSeq = new List <Protein>(); string kMer = mostRecentBaseSequence.Substring(0, kMerLength); foreach (Protein p in theoreticalProteins) { if (theoreticalProteinLookupTable[p].TryGetValue(kMer, out var indices)) { if (NativeStringSearch(p.BaseSequence, mostRecentBaseSequence, indices, kMer)) { proteinsContainingThisSeq.Add(p); } } } if (proteinsContainingThisSeq.Count == 0) { throw new Exception("Database was missing the protein: " + currentPeptide.Protein + " or the given protein did not contain the sequence: " + mostRecentBaseSequence); } string protein = proteinsContainingThisSeq[0].Accession; for (int index = 1; index < proteinsContainingThisSeq.Count; index++) { protein += ";" + proteinsContainingThisSeq[index].Accession; } if (!protein.Equals(currentPeptide.Protein)) { //why aren't they the same? Parsimony currentPeptide.UpdateProteinFromParsimony(protein); } if (proteinsContainingThisSeq.Count > 1) { } //TODO: The handling of this is sloppy, but complicated to deal with the right way. //It's assuming that proteoforms will only appear by being on the same peptide (modified/unmodified) and not through overlapping peptides //find index of base sequence mostRecentIndex = proteinsContainingThisSeq.First().BaseSequence.IndexOf(mostRecentBaseSequence); } currentPeptide.StartResidue = mostRecentIndex; currentPeptide.EndResidue = mostRecentIndex + mostRecentBaseSequence.Length; //if there are mods if (mostRecentBaseSequence.Length != currentPeptide.FullSequence.Length) { string fullSequence = currentPeptide.FullSequence; int currentIndex = 0; for (int index = 0; index < fullSequence.Length; index++) { //if there's a mod if (fullSequence[index] == '[') { int bracketCount = 1; string mod = ""; index++; while (bracketCount != 0) { if (fullSequence[index] == '[') { bracketCount++; } else if (fullSequence[index] == ']') { bracketCount--; index--; } if (bracketCount != 0) { mod += fullSequence[index]; } index++; } if (currentIndex == 1 && currentPeptide.ModDictionary.ContainsKey(0)) { currentPeptide.ModDictionary[0] = currentPeptide.ModDictionary[0] + "+" + mod; } else { currentPeptide.ModDictionary.Add(currentIndex == 0 ? 0 : currentIndex - 1, mod); //N-terminal mods are counted as being on the first residue } } else { currentIndex++; } } } } }); //have all peptides, now convert into proteins List <PeptideTurnoverObject> possibleProteoformGroups = new List <PeptideTurnoverObject>(); var peptidesGroupedByProtein = peptides.GroupBy(x => x.Protein, x => x).ToList(); //TODO parallelize this, break out into separate method, unit tests //foreach protein group for (int i = 0; i < peptidesGroupedByProtein.Count; i++) { var group = peptidesGroupedByProtein[i]; string protein = group.Key; //get the peptides List <PeptideTurnoverObject> peptidesForThisProtein = group.OrderBy(x => x.StartResidue).ToList(); Dictionary <int, List <string> > mods = new Dictionary <int, List <string> >(); foreach (PeptideTurnoverObject peptide in peptidesForThisProtein) { for (int residue = peptide.StartResidue; residue < peptide.EndResidue; residue++) { //is residue start/end correct? string value = UnmodifiedString; if (peptide.ModDictionary.ContainsKey(residue - peptide.StartResidue)) //the mod dictionary is zero-based. { value = peptide.ModDictionary[residue - peptide.StartResidue]; } if (mods.ContainsKey(residue)) { if (!mods[residue].Contains(value)) { mods[residue].Add(value); } } else { mods.Add(residue, new List <string> { value }); } } } //find which residues have multiple forms var residueDifferences = mods.Where(x => x.Value.Count > 1).OrderBy(x => x.Key).ToList(); bool[] uniquePeptides = new bool[peptidesForThisProtein.Count]; //have we added this peptide already? At the start, none have been added. if (residueDifferences.Count != 0) { //foreach residue with a different form foreach (var residueDifference in residueDifferences) { int residueIndex = residueDifference.Key; //get the residue //find the relevant peptides for this residue bool foundResidue = false; for (int index = 0; index < peptidesForThisProtein.Count; index++) { PeptideTurnoverObject peptide = peptidesForThisProtein[index]; //if this peptide is relevant as belonging to a proteoform group if (peptide.StartResidue <= residueIndex && peptide.EndResidue >= residueIndex) {//is this right? uniquePeptides[index] = true; foundResidue = true; string modForThisPeptide = UnmodifiedString; if (peptide.ModDictionary.ContainsKey(residueIndex - peptide.StartResidue)) { modForThisPeptide = peptide.ModDictionary[residueIndex - peptide.StartResidue]; } for (int indexForThisMod = 0; indexForThisMod < residueDifference.Value.Count; indexForThisMod++) { if (residueDifference.Value[indexForThisMod].Equals(modForThisPeptide)) { possibleProteoformGroups.Add(peptide.Copy(peptide.Protein + "_" + modForThisPeptide + "@" + residueDifference.Key.ToString())); break; } } } //if we're no longer looking at relevant peptides else if (foundResidue) { break; } } } } //add peptides that aren't part of proteoform groups for (int index = 0; index < peptidesForThisProtein.Count; index++) { if (!uniquePeptides[index]) { possibleProteoformGroups.Add(peptidesForThisProtein[index]); } } } return(possibleProteoformGroups); //.Where(x => x.Timepoints.Length >= settings.MinValidValuesTotal).OrderByDescending(x => x.Timepoints.Length).ThenByDescending(x => x.TotalIntensity).ToList(); }
private void PlotPeptideData(List <PeptideTurnoverObject> peptidesToPlot) { RatioComparisonPlot.plt.Clear(); RatioComparisonPlot.plt.Legend(false); HalfLifeComparisonPlot.plt.GetPlottables().Clear(); //if (PlotAminoAcidPoolCheckBox.IsChecked.Value) //{ // foreach (string file in FilesToDisplayObservableCollection) // { // PlotFit(PoolParameterDictionary[file], Path.GetFileNameWithoutExtension(file)+" Free Amino Acids"); // } //} double minError = double.PositiveInfinity; double maxError = double.NegativeInfinity; double minHalfLife = double.PositiveInfinity; double maxHalfLife = double.NegativeInfinity; int debug = 0; foreach (PeptideTurnoverObject peptide in peptidesToPlot) { debug++; //get the title int fontSize = Math.Max(Math.Min(24, 100 / (int)Math.Round(Math.Sqrt(peptide.DisplayPeptideSequence.Length))), 12); RatioComparisonPlot.plt.Title(peptide.DisplayPeptideSequence, fontSize: fontSize); string protein = peptide.DisplayProteinOrProteoform; string filepath = peptide.FileName; string filename = Path.GetFileNameWithoutExtension(filepath); //plot actual data RatioComparisonPlot.plt.PlotScatter(peptide.Timepoints, peptide.RelativeFractions, markerSize: 4, lineWidth: 0, label: filename + " Observed Ratios"); //Plot protein info List <PeptideTurnoverObject> peptidesSharingProteinAndFile = PeptidesToDisplay.Where(x => x.DisplayProteinOrProteoform.Equals(protein) && x.FileName.Equals(filepath)).ToList(); double[] errors = peptidesSharingProteinAndFile.Select(x => x.Error).ToArray(); double[] halfLives = peptidesSharingProteinAndFile.Select(x => Math.Log(2, Math.E) / x.Kbi).ToArray(); double[] negativeErrors = peptidesSharingProteinAndFile.Select(x => (Math.Log(2, Math.E) / x.Kbi) - (Math.Log(2, Math.E) / x.HighKbi)).ToArray(); double[] positiveErrors = peptidesSharingProteinAndFile.Select(x => (Math.Log(2, Math.E) / x.LowKbi) - (Math.Log(2, Math.E) / x.Kbi)).ToArray(); HalfLifeComparisonPlot.plt.Title(protein, fontSize: 24); HalfLifeComparisonPlot.plt.Layout(titleHeight: 20, xLabelHeight: 40, y2LabelWidth: 20); HalfLifeComparisonPlot.plt.YLabel("Half-life (Days)", fontSize: 20); HalfLifeComparisonPlot.plt.XLabel("Error (MSE)", fontSize: 20); HalfLifeComparisonPlot.plt.Ticks(fontSize: 18); double errorDiff = errors.Max() - errors.Min(); if (errorDiff == 0) { errorDiff = 0.01; } double halflifeDiff = halfLives.Max() - halfLives.Min(); if (halflifeDiff == 0) { halflifeDiff = 0.01; } var scatter = HalfLifeComparisonPlot.plt.PlotScatter(errors, halfLives, lineWidth: 0, label: filename + " peptides", color: Color.SteelBlue);//debug == 1 ? Color.DodgerBlue : Color.Red); //plot the single point of the selected peptie separately (overlay) so that we know which one it is var point = HalfLifeComparisonPlot.plt.PlotPoint(peptide.Error, Math.Log(2, Math.E) / peptide.Kbi, color: Color.Black); //plot errors HalfLifeComparisonPlot.plt.PlotErrorBars(errors, halfLives, null, null, positiveErrors, negativeErrors, scatter.color); HalfLifeComparisonPlot.plt.PlotErrorBars(new double[] { peptide.Error }, new double[] { Math.Log(2, Math.E) / peptide.Kbi }, null, null, new double[] { (Math.Log(2, Math.E) / peptide.LowKbi) - (Math.Log(2, Math.E) / peptide.Kbi) }, new double[] { (Math.Log(2, Math.E) / peptide.Kbi) - (Math.Log(2, Math.E) / peptide.HighKbi) }, color: point.color); minError = Math.Min(minError, errors.Min() - errorDiff * 0.2); maxError = Math.Max(maxError, errors.Max() + errorDiff * 0.2); minHalfLife = Math.Min(minHalfLife, halfLives.Min() - negativeErrors.Max() - halflifeDiff * 0.2); maxHalfLife = Math.Max(maxHalfLife, halfLives.Max() + positiveErrors.Max() + halflifeDiff * 0.2); double ySpacingFactor = (maxHalfLife - minHalfLife) * 0.2; HalfLifeComparisonPlot.plt.Axis(minError, maxError, minHalfLife - ySpacingFactor, maxHalfLife + ySpacingFactor); HalfLifeComparisonPlot.plt.Axis(); PeptideTurnoverObject currentProtein = DisplayProteinInSpecificTable ? AnalyzedProteins.Where(x => x.Protein.Equals(protein) && x.FileName.Equals(filepath)).FirstOrDefault() : AnalyzedProteoforms.Where(x => x.Proteoform.Equals(protein) && x.FileName.Equals(filepath)).FirstOrDefault(); if (currentProtein == null) { MessageBox.Show("Unable to find the protein for this peptide. There may be an issue with the loaded file."); return; } //plot the fit if (PlotBestFitCheckBox.IsChecked.Value) { //peptide level PlotFit(PoolParameterDictionary[filepath], filename + " Fit (" + (Math.Log(2, Math.E) / peptide.Kbi).ToString("F1") + " d)", peptide.Kbi); //protein level double halfLife = Math.Log(2, Math.E) / currentProtein.Kbi; HalfLifeComparisonPlot.plt.PlotHLine(halfLife, label: filename + " Half-life (" + halfLife.ToString("F1") + ")", color: Color.OrangeRed);//debug == 1 ? Color.DodgerBlue : Color.Red); } //plt the confidence intervals if (PlotCICheckBox.IsChecked.Value) { //peptide level PlotFit(PoolParameterDictionary[filepath], filename + " Upper CI (" + (Math.Log(2, Math.E) / peptide.LowKbi).ToString("F1") + " d)", peptide.LowKbi); PlotFit(PoolParameterDictionary[filepath], filename + " Lower CI (" + (Math.Log(2, Math.E) / peptide.HighKbi).ToString("F1") + " d)", peptide.HighKbi); //protein level double upperHL = Math.Log(2, Math.E) / currentProtein.LowKbi; double lowerHL = Math.Log(2, Math.E) / currentProtein.HighKbi; HalfLifeComparisonPlot.plt.PlotHLine(upperHL, label: filename + " Upper CI (" + upperHL.ToString("F1") + ")", color: Color.Green); HalfLifeComparisonPlot.plt.PlotHLine(lowerHL, label: filename + " Lower CI (" + lowerHL.ToString("F1") + ")", color: Color.Red); } } if (PlotAminoAcidPoolCheckBox.IsChecked.Value) { foreach (string file in FilesToDisplayObservableCollection) { PlotFit(PoolParameterDictionary[file], Path.GetFileNameWithoutExtension(file) + " Free Amino Acids"); } } if (DisplayLegendCheckBox.IsChecked.Value) { HalfLifeComparisonPlot.plt.Legend(); } else { HalfLifeComparisonPlot.plt.Legend(false); } HalfLifeComparisonPlot.Render(); }
public static void CompareProteinsAcrossFiles(List <string> filenames, List <PeptideTurnoverObject> allProteins, Dictionary <string, PoolParameters> poolParameterDictionary) { if (filenames.Count < 2) { return; } string outputDirectory = Path.GetDirectoryName(filenames.First()); Directory.CreateDirectory(Path.Combine(outputDirectory, "StatisticalComparisons")); for (int i = 0; i < filenames.Count; i++) { string fileOne = filenames[i]; PoolParameters paramsOne = poolParameterDictionary[fileOne]; //get the proteins for this file List <PeptideTurnoverObject> proteinsForFileOne = allProteins.Where(x => fileOne.Equals(x.FileName)).OrderBy(x => x.Protein).ToList(); for (int j = i + 1; j < filenames.Count; j++) { string fileTwo = filenames[j]; PoolParameters paramsTwo = poolParameterDictionary[fileTwo]; List <string> linesToWrite = new List <string>(); //add header linesToWrite.Add("Protein\tFold Change\tNeg. log(p-Value)\tHalf-life " + fileOne + "\tHalf-life " + fileTwo); List <PeptideTurnoverObject> proteinsForFileTwo = allProteins.Where(x => fileTwo.Equals(x.FileName)).OrderBy(x => x.Protein).ToList(); //get the overlap between them int a = 0; int b = 0; while (a < proteinsForFileOne.Count && b < proteinsForFileTwo.Count) { PeptideTurnoverObject proteinOne = proteinsForFileOne[a]; PeptideTurnoverObject proteinTwo = proteinsForFileTwo[b]; int comparison = (proteinOne.Protein).CompareTo(proteinTwo.Protein); if (comparison == 0) { //do the comparison (t-test of montecarlos, which dramatically overestimates the sample size) //Sample sampleOne = new Sample(proteinOne.MonteCarloKbis.Select(x => Math.Log10(2) / x)); //Sample sampleTwo = new Sample(proteinTwo.MonteCarloKbis.Select(x => Math.Log10(2) / x)); //TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); //linesToWrite.Add(proteinOne.Protein + "\t" + (Math.Log2(sampleTwo.Median) - Math.Log2(sampleOne.Median)).ToString() + '\t' + // (-1*Math.Log10(result.Probability)).ToString() + '\t' + (Math.Log10(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log10(2) / proteinTwo.Kbi).ToString()); //do the comparison (t-test of normalized ratios for all timepoints) double averageKbi = (proteinOne.Kbi + proteinTwo.Kbi) / 2; double normalizedHalfLife = Math.Log(2) / (averageKbi); //this is the day we're going to normalize all of the relative fractions to //create an array of a single value (the normalized timepoint) to create a new timepoint array double[] comparisonTimepointsOne = new double[proteinOne.Timepoints.Length]; double[] comparisonTimepointsTwo = new double[proteinTwo.Timepoints.Length]; for (int index = 0; index < comparisonTimepointsOne.Length; index++) { comparisonTimepointsOne[index] = normalizedHalfLife; } for (int index = 0; index < comparisonTimepointsTwo.Length; index++) { comparisonTimepointsTwo[index] = normalizedHalfLife; } //predict the expected values for the ratios of protein one based on the fit of the comparison double[] expectedOriginalRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsOne.Kst, paramsOne.Kbt, paramsOne.Kao, averageKbi, proteinOne.Timepoints); //predict the expected values for the ratios of proteoform one based on the fit of the comparison if they were all at the same normalized timepoint double[] expectedUpdatedRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsOne.Kst, paramsOne.Kbt, paramsOne.Kao, averageKbi, comparisonTimepointsOne); //do the same thing with protein two double[] expectedOriginalRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsTwo.Kst, paramsTwo.Kbt, paramsTwo.Kao, averageKbi, proteinTwo.Timepoints); double[] expectedUpdatedRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(paramsTwo.Kst, paramsTwo.Kbt, paramsTwo.Kao, averageKbi, comparisonTimepointsTwo); //create empty arrays for the normalized ratios double[] normalizedRatiosOne = new double[expectedOriginalRatiosOne.Length]; double[] normalizedRatiosTwo = new double[expectedOriginalRatiosTwo.Length]; //calculate the normalized ratios by subtracting the expected ratio (so that we are measuring the residual between the point and the comparison fit) and then adding the ratio of the comparison fit at the normalized timepoint. for (int index = 0; index < proteinOne.RelativeFractions.Length; index++) { //the normalized ratio is equal to the original ratio minus the original fit to the data plus the fit if the kbi was averaged normalizedRatiosOne[index] = proteinOne.RelativeFractions[index] - expectedOriginalRatiosOne[index] + expectedUpdatedRatiosOne[index]; } for (int index = 0; index < proteinTwo.RelativeFractions.Length; index++) { normalizedRatiosTwo[index] = proteinTwo.RelativeFractions[index] - expectedOriginalRatiosTwo[index] + expectedUpdatedRatiosTwo[index]; } Sample sampleOne = new Sample(normalizedRatiosOne); Sample sampleTwo = new Sample(normalizedRatiosTwo); TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); linesToWrite.Add(proteinOne.Protein + "\t" + (Math.Log2(Math.Log(2) / proteinTwo.Kbi) - Math.Log2(Math.Log(2) / proteinOne.Kbi)).ToString() + '\t' + (-1 * Math.Log10(result.Probability)).ToString() + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString()); a++; b++; } else if (comparison < 0) { a++; } else { b++; } } File.WriteAllLines(Path.Combine(outputDirectory, "StatisticalComparisons", "Comparison_" + Path.GetFileNameWithoutExtension(fileOne) + "vs" + Path.GetFileNameWithoutExtension(fileTwo) + ".tsv"), linesToWrite); } } }
public static void CompareProteoformsWithinFiles(List <string> filenames, List <PeptideTurnoverObject> allProteins, Dictionary <string, PoolParameters> poolParameterDictionary) { for (int fileIndex = 0; fileIndex < filenames.Count; fileIndex++) { string filename = filenames[fileIndex]; PoolParameters poolParams = poolParameterDictionary[filename]; List <PeptideTurnoverObject> proteinsForThisFile = allProteins.Where(x => filename.Equals(x.FileName)).OrderBy(x => x.Proteoform).ToList(); List <string> linesToWrite = new List <string>(); linesToWrite.Add("Proteoform A\tProteoform B\tHalf-life A\tHalf-life B\tLog2(Fold Change)\tNeg. log(p-Value)"); int indexOfNextProteoformFamily = 0; for (int i = 0; i < proteinsForThisFile.Count; i++) { string currentProtein = proteinsForThisFile[i].Proteoform.Split('_')[0]; //find last index for this proteoform family indexOfNextProteoformFamily++; for (; indexOfNextProteoformFamily < proteinsForThisFile.Count; indexOfNextProteoformFamily++) { if (!currentProtein.Equals(proteinsForThisFile[indexOfNextProteoformFamily].Proteoform.Split('_')[0])) { break; } } for (; i < indexOfNextProteoformFamily; i++) { PeptideTurnoverObject proteinOne = proteinsForThisFile[i]; //see if it has a localized mod (or localized unmodified site), otherwise skip string[] proteoformOne = proteinOne.Proteoform.Split('@').ToArray(); if (proteoformOne.Length == 2) { for (int j = i + 1; j < indexOfNextProteoformFamily; j++) { PeptideTurnoverObject proteinTwo = proteinsForThisFile[j]; string[] proteoformTwo = proteinTwo.Proteoform.Split('@').ToArray(); //if these are a pair for the same modification site, then do the comparison if (proteoformTwo.Length == 2 && proteoformOne[1].Equals(proteoformTwo[1])) { //do the comparison (t-test of normalized ratios for all timepoints) double averageKbi = (proteinOne.Kbi + proteinTwo.Kbi) / 2; double normalizedHalfLife = Math.Log(2) / (averageKbi); //this is the day we're going to normalize all of the relative fractions to //create an array of a single value (the normalized timepoint) to create a new timepoint array double[] comparisonTimepointsOne = new double[proteinOne.Timepoints.Length]; double[] comparisonTimepointsTwo = new double[proteinTwo.Timepoints.Length]; for (int index = 0; index < comparisonTimepointsOne.Length; index++) { comparisonTimepointsOne[index] = normalizedHalfLife; } for (int index = 0; index < comparisonTimepointsTwo.Length; index++) { comparisonTimepointsTwo[index] = normalizedHalfLife; } double[] expectedOriginalRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, proteinOne.Timepoints); double[] expectedUpdatedRatiosOne = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, comparisonTimepointsOne); double[] expectedOriginalRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, proteinTwo.Timepoints); double[] expectedUpdatedRatiosTwo = NonLinearRegression.PredictRelativeFractionUsingThreeCompartmentModel(poolParams.Kst, poolParams.Kbt, poolParams.Kao, averageKbi, comparisonTimepointsTwo); double[] normalizedRatiosOne = new double[expectedOriginalRatiosOne.Length]; double[] normalizedRatiosTwo = new double[expectedOriginalRatiosTwo.Length]; for (int index = 0; index < proteinOne.RelativeFractions.Length; index++) { //the normalized ratio is equal to the original ratio minus the original fit to the data plus the fit if the kbi was averaged normalizedRatiosOne[index] = proteinOne.RelativeFractions[index] - expectedOriginalRatiosOne[index] + expectedUpdatedRatiosOne[index]; } for (int index = 0; index < proteinTwo.RelativeFractions.Length; index++) { normalizedRatiosTwo[index] = proteinTwo.RelativeFractions[index] - expectedOriginalRatiosTwo[index] + expectedUpdatedRatiosTwo[index]; } Sample sampleOne = new Sample(normalizedRatiosOne); Sample sampleTwo = new Sample(normalizedRatiosTwo); TestResult result = Sample.StudentTTest(sampleOne, sampleTwo); try //sometimes crashes if stdev is zero { linesToWrite.Add(proteinOne.Proteoform + "\t" + proteinTwo.Proteoform + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString() + '\t' + (Math.Log2((Math.Log(2) / proteinTwo.Kbi)) - Math.Log2((Math.Log(2) / proteinOne.Kbi))).ToString() + '\t' + (-1 * Math.Log(result.Probability)).ToString()); } catch { linesToWrite.Add(proteinOne.Proteoform + "\t" + proteinTwo.Proteoform + '\t' + (Math.Log(2) / proteinOne.Kbi).ToString() + '\t' + (Math.Log(2) / proteinTwo.Kbi).ToString() + '\t' + (Math.Log2(sampleTwo.Median) - Math.Log2(sampleOne.Median)).ToString() + '\t' + "NA"); } } } } } i--; } File.WriteAllLines(Path.Combine(Path.GetDirectoryName(filename), Path.GetFileNameWithoutExtension(filename) + "_Results", Path.GetFileNameWithoutExtension(filename) + "_ProteoformAnalysis.tsv"), linesToWrite); } }