public void TestGetNValueUsingTTest() { double expectedNValue = -9.276; double expectedPValue = 0.137; double expectedLogFoldChange = -0.087; List <double> proteinFirstConditionIntensityValues = new List <double>(); List <double> proteinSecondConditionIntensityValues = new List <double>(); proteinFirstConditionIntensityValues.Add(25.535); proteinFirstConditionIntensityValues.Add(25.482); proteinFirstConditionIntensityValues.Add(25.308); proteinFirstConditionIntensityValues.Add(25.373); proteinSecondConditionIntensityValues.Add(25.370); proteinSecondConditionIntensityValues.Add(25.368); proteinSecondConditionIntensityValues.Add(25.359); proteinSecondConditionIntensityValues.Add(25.251); double sOValue = 0.3; StatisticalTests statisticalTests = new StatisticalTests(); List <double> proteinStatistics = statisticalTests.GetNValueUsingTTest(proteinFirstConditionIntensityValues, proteinSecondConditionIntensityValues, sOValue, false); Assert.AreEqual(expectedNValue, proteinStatistics[0], 0.001); Assert.AreEqual(expectedPValue, proteinStatistics[1], 0.001); Assert.AreEqual(expectedLogFoldChange, proteinStatistics[2], 0.001); }
public void TestCalculateNvaluethreshold() { double expectedNValueThreshold = 74; List <double> observedNValues = new List <double>(); for (int i = 0; i < 100; i++) { observedNValues.Add(i + 100); } List <double> permutedNValues = new List <double>(); for (int i = 0; i < 100; i++) { permutedNValues.Add(i); } double FDR = 0.25; StatisticalTests statisticalTests = new StatisticalTests(); double outputNValueThrehold = statisticalTests.calculateNvaluethreshold(observedNValues, permutedNValues, FDR); Assert.AreEqual(expectedNValueThreshold, outputNValueThrehold, 0.001); }
public void TestCalculateProteinIntensityValuesStandardDeviation() { List <double> testIntensityValues = new List <double>(); testIntensityValues.Add(24.75); testIntensityValues.Add(25.15); testIntensityValues.Add(28.35); testIntensityValues.Add(21.95); double intensityValuesMean = 25.05; double expectedStandardDev = 2.269; StatisticalTests statisticalTests = new StatisticalTests(); double outputStandardDev = statisticalTests.CalculateProteinIntensityValuesStandardDeviation (testIntensityValues, intensityValuesMean); Assert.AreEqual(expectedStandardDev, outputStandardDev, 0.001); }
public void TestStatisticalTestsGenerateAllCombinationsOfTwoConditions() { StatisticalTests statisticalTests = new StatisticalTests(); List <double> testIndices = new List <double>(); testIndices.Add(1); testIndices.Add(2); testIndices.Add(3); testIndices.Add(4); List <List <int> > expectedIndicesPairs = new List <List <int> >(); expectedIndicesPairs.Add(new List <int>() { 0, 1 }); expectedIndicesPairs.Add(new List <int>() { 0, 2 }); expectedIndicesPairs.Add(new List <int>() { 0, 3 }); expectedIndicesPairs.Add(new List <int>() { 1, 2 }); expectedIndicesPairs.Add(new List <int>() { 1, 3 }); expectedIndicesPairs.Add(new List <int>() { 2, 3 }); List <List <int> > resultIndicesPairs = statisticalTests.GenerateAllCombinationsOfTwoIndices(testIndices); CollectionAssert.AreEqual(expectedIndicesPairs, resultIndicesPairs); }
public void TestGetNValueUsingPermutationTests() { List <double> expectedPermutedNValues = new List <double>(); expectedPermutedNValues.Add(-13.021); expectedPermutedNValues.Add(-12.269); expectedPermutedNValues.Add(-8.298); expectedPermutedNValues.Add(-12.119); expectedPermutedNValues.Add(-8.270); expectedPermutedNValues.Add(-8.161); expectedPermutedNValues.Add(-17.649); expectedPermutedNValues.Add(-19.749); expectedPermutedNValues.Add(-24.623); expectedPermutedNValues.Add(-20.303); expectedPermutedNValues.Add(-23.756); expectedPermutedNValues.Add(-20.594); List <double> proteinFirstConditionIntensityValues = new List <double>(); List <double> proteinSecondConditionIntensityValues = new List <double>(); proteinFirstConditionIntensityValues.Add(25.535); proteinFirstConditionIntensityValues.Add(25.482); proteinFirstConditionIntensityValues.Add(25.308); proteinFirstConditionIntensityValues.Add(25.373); proteinSecondConditionIntensityValues.Add(25.370); proteinSecondConditionIntensityValues.Add(25.368); proteinSecondConditionIntensityValues.Add(25.359); proteinSecondConditionIntensityValues.Add(25.251); double sOValue = 0.3; StatisticalTests statisticalTests = new StatisticalTests(); List <double> outputPermutedNValues = statisticalTests.GetNValueUsingPermutationtests(proteinFirstConditionIntensityValues, proteinSecondConditionIntensityValues, sOValue); Assert.That(expectedPermutedNValues, Is.EqualTo(outputPermutedNValues).Within(0.001)); }
public static void Main(string[] args) { RunProteinSignificanceClassifier proteinBasedSignificance = new RunProteinSignificanceClassifier(); int numberSamples = 0; // Parse the ExperimentalDesign File to get info of samples and conditions they belong to Dictionary <string, List <string> > samplefileConditionRelation = proteinBasedSignificance.ExpermientalDesignParser("C:/Users/Anay/Desktop/UW Madison/Smith Lab/Spectra Data/ExperimentalDesign.tsv" , ref numberSamples); // get all conditions and pair them up for Significance classification List <string> allConditions = new List <string>(samplefileConditionRelation.Keys); List <List <string> > allTwoConditionCombinations = proteinBasedSignificance.GenerateAllCombinationsOfTwoConditions(allConditions); foreach (List <string> conditionPair in allTwoConditionCombinations) { string firstCondition = conditionPair[0]; string secondCondition = conditionPair[1]; double sOValue = 0.1; double meanFraction = 0.1; int maxSignificantCount = 0; while (meanFraction < 1) { while (sOValue < 1) { for (int k = 0; k < numberSamples; k++) { proteinBasedSignificance = new RunProteinSignificanceClassifier(); //Declaring variables which will be generated after parsing QuantifiedPeptides file List <ProteinRowInfo> allProteinInfo = new List <ProteinRowInfo>(); List <string> samplesFileNames = new List <string>(); int maxInvalidIntensityValues = k; proteinBasedSignificance.ProteinDataParser(allProteinInfo, maxInvalidIntensityValues, samplesFileNames, "C:/Users/Anay/Desktop/UW Madison/Smith Lab/Spectra Data/FlashLFQ_2020-04-26-17-39-35/QuantifiedProteins.tsv"); // imputes missing intensity values for each protein ImputationProcess imputationProcess = new ImputationProcess(); imputationProcess.RunImputationProcess(allProteinInfo, samplesFileNames, meanFraction); // Declaring variables which will be generated after T-Tests and Permutation Tests List <double> observedNValues = new List <double>(); // will store observed N values List <double> permutedNValues = new List <double>(); // will store permuted N values StatisticalTests statisticalTests = new StatisticalTests(); // contains proteins and their observed N value, P Value and Log Fold Change Dictionary <string, List <double> > allProteinObservedStatistics = new Dictionary <string, List <double> >(); // Creating threads for Parallelizing code ThreadPool.GetMaxThreads(out int workerThreadsCount, out int ioThreadsCount); int[] threads = Enumerable.Range(0, workerThreadsCount).ToArray(); Parallel.ForEach(threads, (i) => { // Compute observed and permuted N Values for each protein using T Tests and Permutation Testing for (; i < allProteinInfo.Count; i += workerThreadsCount) { ProteinRowInfo proteinRowInfo = allProteinInfo[i]; Dictionary <string, double> samplesintensityData = proteinRowInfo.SamplesIntensityData; List <string> firstConditionAssociatedSamples = samplefileConditionRelation.GetValueOrDefault(firstCondition); List <string> secondConditionAssociatedSamples = samplefileConditionRelation.GetValueOrDefault(secondCondition); List <double> proteinFirstConditionIntensityValues = new List <double>(); List <double> proteinSecondConditionIntensityValues = new List <double>(); // get the protein's intensity values corresponding to the chosen pair of conditions foreach (string sampleFileName in samplesFileNames) { if (firstConditionAssociatedSamples.Contains(sampleFileName)) { proteinFirstConditionIntensityValues.Add(samplesintensityData[sampleFileName]); } if (secondConditionAssociatedSamples.Contains(sampleFileName)) { proteinSecondConditionIntensityValues.Add(samplesintensityData[sampleFileName]); } } // Compute observed N Values with the chosen pair of conditions using T-Tests and // store in observedNValues array List <double> proteinStatistics = statisticalTests.GetNValueUsingTTest(proteinFirstConditionIntensityValues, proteinSecondConditionIntensityValues, sOValue, false); // Compute permuted N Values with the chosen pair of conditions using T-Tests and // store in permutedNValues array List <double> proteinPermutedNavlues = statisticalTests.GetNValueUsingPermutationtests(proteinFirstConditionIntensityValues, proteinSecondConditionIntensityValues, sOValue); // add computed original and permuted statistics for the protein lock (allProteinObservedStatistics) { // add protein and its observed N value, P Value and Log Fold Change in that order allProteinObservedStatistics.Add(proteinRowInfo.ProteinID, new List <double>() { proteinStatistics[0], proteinStatistics[1], proteinStatistics[2] }); observedNValues.Add(proteinStatistics[0]); foreach (double permutedNValue in proteinPermutedNavlues) { permutedNValues.Add(permutedNValue); } } } }); // makes the permuted N values list and the observed N Values list of the same size proteinBasedSignificance.ResizePermutedArray(permutedNValues, permutedNValues.Count() - observedNValues.Count()); // get the threshold at which we will filter out the significant proteins double nValueThreshold = statisticalTests.calculateNvaluethreshold(observedNValues, permutedNValues, 0.05); // determine number of signifcant proteins detected int newSignificantCount = observedNValues.Count(x => x >= nValueThreshold); if (newSignificantCount > maxSignificantCount) { maxSignificantCount = newSignificantCount; proteinBasedSignificance.PrintSignificantProtein(allProteinInfo, nValueThreshold, allProteinObservedStatistics, "C:/Users/Anay/Desktop/UW Madison/Smith Lab/Project 1/ConsoleApp1/ProteinBasedSignificanceModified.csv"); Console.WriteLine("Sig Count - " + maxSignificantCount + "meanFraction - " + meanFraction + "sOValue - " + sOValue + "k - " + k); } } sOValue = sOValue + 0.1; } sOValue = 0.1; meanFraction = meanFraction + 0.3; } } }
public static void Main(string[] args) { Program proteinBasedSignificance = new Program(); // Parse the ExperimentalDesign File to get info of samples and conditions they belong to Dictionary <string, List <string> > samplefileConditionRelation = proteinBasedSignificance.ExpermientalDesignParser("C:/Users/Anay/Desktop/UW Madison/Smith Lab/Spectra Data/ExperimentalDesign.tsv"); // get all conditions and pair them up for Significance classification List <string> allConditions = new List <string>(samplefileConditionRelation.Keys); List <List <string> > allTwoConditionCombinations = proteinBasedSignificance.GenerateAllCombinationsOfTwoConditions(allConditions); foreach (List <string> conditionPair in allTwoConditionCombinations) { string firstCondition = conditionPair[0]; string secondCondition = conditionPair[1]; double sOValue = 0.1; double meanFraction = 0.1; int maxSignificantCount = 0; while (meanFraction < 1) { while (sOValue < 1) { for (int k = 1; k < 9; k++) { proteinBasedSignificance = new Program(); //Declaring variables which will be generated after parsing QuantifiedPeptides file List <ProteinRowInfo> allProteinInfo = new List <ProteinRowInfo>(); List <string> samplesFileNames = new List <string>(); int maxInvalidIntensityValues = k; proteinBasedSignificance.ProteinDataParser(allProteinInfo, maxInvalidIntensityValues, samplesFileNames, "C:/Users/Anay/Desktop/UW Madison/Smith Lab/Spectra Data/FlashLFQ_2020-04-26-17-39-35/QuantifiedProteins.tsv"); // imputes missing intensity values for each protein ImputationProcess imputationProcess = new ImputationProcess(); imputationProcess.RunImputationProcess(allProteinInfo, samplesFileNames, meanFraction); // Declaring variables which will be generated after T-Tests and Permutation Tests List <double> actualNValues = new List <double>(); // will store actual(real) N values List <double> actualPValues = new List <double>(); // will store actual(real) P values List <double> actualLogFoldChange = new List <double>(); // will store actual(real) Log Fold Change values List <double> permutedNValues = new List <double>(); // will store permuted(fake) N values StatisticalTests statisticalTests = new StatisticalTests(); // Compute actual and permuted N Values for each protein using T Tests and Permutation Testing for (int i = 0; i < allProteinInfo.Count; i++) { ProteinRowInfo proteinRowInfo = allProteinInfo[i]; Dictionary <string, double> samplesintensityData = proteinRowInfo.SamplesIntensityData; List <string> firstConditionAssociatedSamples = samplefileConditionRelation.GetValueOrDefault(firstCondition); List <string> secondConditionAssociatedSamples = samplefileConditionRelation.GetValueOrDefault(secondCondition); List <double> proteinFirstConditionIntensityValues = new List <double>(); List <double> proteinSecondConditionIntensityValues = new List <double>(); // get the protein's intensity values corresponding to the chosen pair of conditions foreach (string sampleFileName in samplesFileNames) { if (firstConditionAssociatedSamples.Contains(sampleFileName)) { proteinFirstConditionIntensityValues.Add(samplesintensityData[sampleFileName]); } if (secondConditionAssociatedSamples.Contains(sampleFileName)) { proteinSecondConditionIntensityValues.Add(samplesintensityData[sampleFileName]); } } // Compute actual(real) N Values with the chosen pair of conditions using T-Tests and // store in actualNValues array statisticalTests.GetNValueUsingTTest(proteinFirstConditionIntensityValues, proteinSecondConditionIntensityValues, actualNValues, actualPValues, actualLogFoldChange, sOValue); // Compute permuted(fake) N Values with the chosen pair of conditions using T-Tests and // store in permutedNValues array statisticalTests.GetNValueUsingPermutationtests(proteinFirstConditionIntensityValues, proteinSecondConditionIntensityValues, permutedNValues, sOValue); } // makes the permuted N values list and the actual N Values list of the same size proteinBasedSignificance.ResizePermutedArray(permutedNValues, permutedNValues.Count() - actualNValues.Count()); // Copy of the actual N values which will be used when determind the N Value threshold for target FDR List <double> actualNValuesCopy = new List <double>(); for (int i = 0; i < actualNValues.Count; i++) { actualNValuesCopy.Add(actualNValues[i]); } // get the threshold at which we will filter out the significant proteins double nValueThreshold = statisticalTests.calculateNvaluethreshold(actualNValuesCopy, permutedNValues, 0.05); // determine number of signifcant proteins detected int newSignificantCount = actualNValues.Count(x => x >= nValueThreshold); if (newSignificantCount > maxSignificantCount) { maxSignificantCount = newSignificantCount; proteinBasedSignificance.PrintSignificantProtein(allProteinInfo, actualNValues, nValueThreshold, actualPValues, actualLogFoldChange, "C:/Users/Anay/Desktop/UW Madison/Smith Lab/Project 1/ConsoleApp1/ProteinBaseedSignificance.csv"); } } sOValue = sOValue + 0.1; } sOValue = 0.1; meanFraction = meanFraction + 0.3; } } }