private void TestGroupComparison(TextReader textReader, bool includeInteraction, IDictionary <string, LinearFitResult> expectedResults) { var csvReader = new DsvFileReader(textReader, ','); var dataRowsByProtein = ToDataRows(ReadCsvFile(csvReader)); Assert.AreNotEqual(0, dataRowsByProtein.Count); var cache = new QrFactorizationCache(); foreach (var entry in dataRowsByProtein) { FoldChangeDataSet dataSet = FoldChangeCalculator.MakeDataSet(entry.Value); var designMatrix = DesignMatrix.GetDesignMatrix(dataSet, includeInteraction); var foldChange = designMatrix.PerformLinearFit(cache).First(); LinearFitResult expectedResult = null; if (null != expectedResults) { Assert.IsTrue(expectedResults.TryGetValue(entry.Key, out expectedResult)); } if (null != expectedResult) { Assert.AreEqual(expectedResult.EstimatedValue, foldChange.EstimatedValue, 1E-6); Assert.AreEqual(expectedResult.DegreesOfFreedom, foldChange.DegreesOfFreedom); Assert.AreEqual(expectedResult.StandardError, foldChange.StandardError, 1E-6); Assert.AreEqual(expectedResult.TValue, foldChange.TValue, 1E-6); Assert.AreEqual(expectedResult.PValue, foldChange.PValue, 1E-6); } } }
private GroupComparisonResult CalculateFoldChangeUsingRegression( GroupComparisonSelector selector, List <RunAbundance> runAbundances) { var detailRows = new List <DataRowDetails>(); GetDataRows(selector, detailRows); if (detailRows.Count == 0) { return(null); } runAbundances = runAbundances ?? new List <RunAbundance>(); var foldChangeDataRows = detailRows .Where(row => !double.IsNaN(row.GetLog2Abundance()) && !double.IsInfinity(row.GetLog2Abundance())) .Select(row => new FoldChangeCalculator.DataRow { Abundance = row.GetLog2Abundance(), Control = row.Control, Feature = row.IdentityPath, Run = row.ReplicateIndex, Subject = row.BioReplicate, }).ToArray(); FoldChangeDataSet runQuantificationDataSet = FoldChangeCalculator.MakeDataSet(foldChangeDataRows); var runNumberToReplicateIndex = FoldChangeCalculator.GetUniqueList(foldChangeDataRows.Select(row => row.Run)); var runQuantificationDesignMatrix = DesignMatrix.GetRunQuantificationDesignMatrix(runQuantificationDataSet); var quantifiedRuns = runQuantificationDesignMatrix.PerformLinearFit(_qrFactorizationCache); var subjects = new List <int>(); for (int run = 0; run < quantifiedRuns.Count; run++) { int iRow = runQuantificationDataSet.Runs.IndexOf(run); subjects.Add(runQuantificationDataSet.Subjects[iRow]); var replicateIndex = runNumberToReplicateIndex[run]; var replicateDetails = _replicateIndexes.First(kvp => kvp.Key == replicateIndex).Value; runAbundances.Add(new RunAbundance { ReplicateIndex = replicateIndex, Control = replicateDetails.IsControl, BioReplicate = replicateDetails.BioReplicate, Log2Abundance = quantifiedRuns[run].EstimatedValue }); } var abundances = quantifiedRuns.Select(result => result.EstimatedValue).ToArray(); var quantifiedDataSet = new FoldChangeDataSet( abundances, Enumerable.Repeat(0, quantifiedRuns.Count).ToArray(), Enumerable.Range(0, quantifiedRuns.Count).ToArray(), subjects, runQuantificationDataSet.SubjectControls); if (quantifiedDataSet.SubjectControls.Distinct().Count() < 2) { return(null); } var foldChangeResult = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false).PerformLinearFit(_qrFactorizationCache).First(); return(new GroupComparisonResult(selector, quantifiedRuns.Count, foldChangeResult, runAbundances)); }
[Timeout(36000000)] // These can take a long time in code coverage mode public void TestGroupComparisonWithRunQuantification() { var csvReader = new DsvFileReader(GetTextReader("quant.csv"), ','); var dataRowsByProtein = ToDataRows(ReadCsvFile(csvReader)); var expectedResultsByProtein = ReadCsvFile(new DsvFileReader(GetTextReader("result_newtesting_v2.csv"), ',')) .ToDictionary(row => row["Protein"]); var cache = new QrFactorizationCache(); foreach (var entry in dataRowsByProtein) { FoldChangeDataSet dataSet = FoldChangeCalculator.MakeDataSet(entry.Value); var quantifiedRuns = DesignMatrix.GetRunQuantificationDesignMatrix(dataSet).PerformLinearFit(cache); var subjects = new List <int>(); for (int run = 0; run < quantifiedRuns.Count; run++) { int iRow = dataSet.Runs.IndexOf(run); subjects.Add(dataSet.Subjects[iRow]); } var abundances = quantifiedRuns.Select(result => result.EstimatedValue).ToArray(); var quantifiedDataSet = new FoldChangeDataSet( abundances, Enumerable.Repeat(0, quantifiedRuns.Count).ToArray(), Enumerable.Range(0, quantifiedRuns.Count).ToArray(), subjects, dataSet.SubjectControls); var foldChangeResult = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false).PerformLinearFit(cache).First(); var expectedResult = expectedResultsByProtein[entry.Key]; string message = entry.Key; Assert.AreEqual(double.Parse(expectedResult["logFC"], CultureInfo.InvariantCulture), foldChangeResult.EstimatedValue, 1E-6, message); Assert.AreEqual(double.Parse(expectedResult["SE"], CultureInfo.InvariantCulture), foldChangeResult.StandardError, 1E-6, message); Assert.AreEqual(int.Parse(expectedResult["DF"], CultureInfo.InvariantCulture), foldChangeResult.DegreesOfFreedom, message); if (Math.Abs(foldChangeResult.EstimatedValue) > 1E-8) { Assert.AreEqual(double.Parse(expectedResult["pvalue"], CultureInfo.InvariantCulture), foldChangeResult.PValue, 1E-6, message); Assert.AreEqual(double.Parse(expectedResult["Tvalue"], CultureInfo.InvariantCulture), foldChangeResult.TValue, 1E-6, message); } } }
private GroupComparisonResult CalculateFoldChangeWithSummarization(GroupComparisonSelector selector, List <RunAbundance> runAbundances, Func <IList <DataRowDetails>, IList <RunAbundance> > summarizationFunction) { var detailRows = new List <DataRowDetails>(); GetDataRows(selector, detailRows); if (detailRows.Count == 0) { return(null); } var replicateRows = summarizationFunction(detailRows); if (replicateRows.Count == 0) { return(null); } if (null != runAbundances) { runAbundances.AddRange(replicateRows); } var summarizedRows = replicateRows; if (replicateRows.Any(row => null != row.BioReplicate)) { var groupedByBioReplicate = replicateRows.ToLookup( row => new KeyValuePair <string, bool>(row.BioReplicate, row.Control)); summarizedRows = groupedByBioReplicate.Select( grouping => { return(new RunAbundance() { BioReplicate = grouping.Key.Key, Control = grouping.Key.Value, ReplicateIndex = -1, Log2Abundance = grouping.Average(row => row.Log2Abundance), }); }).ToList(); } var quantifiedDataSet = new FoldChangeDataSet( summarizedRows.Select(row => row.Log2Abundance).ToArray(), Enumerable.Repeat(0, summarizedRows.Count).ToArray(), Enumerable.Range(0, summarizedRows.Count).ToArray(), Enumerable.Range(0, summarizedRows.Count).ToArray(), summarizedRows.Select(row => row.Control).ToArray()); if (quantifiedDataSet.SubjectControls.Distinct().Count() < 2) { return(null); } var designMatrix = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false); var foldChangeResult = designMatrix.PerformLinearFit(_qrFactorizationCache).First(); // Note that because the design matrix has only two columns, this is equivalent to a simple linear // regression // var statsAbundances = new Util.Statistics(summarizedRows.Select(row => row.Log2Abundance)); // var statsXValues = new Util.Statistics(summarizedRows.Select(row => row.Control ? 0.0 : 1)); // var slope = statsAbundances.Slope(statsXValues); return(new GroupComparisonResult(selector, replicateRows.Count, foldChangeResult)); }