Beispiel #1
0
        private void TestGroupComparison(TextReader textReader, bool includeInteraction, IDictionary <string, LinearFitResult> expectedResults)
        {
            var csvReader         = new DsvFileReader(textReader, ',');
            var dataRowsByProtein = ToDataRows(ReadCsvFile(csvReader));

            Assert.AreNotEqual(0, dataRowsByProtein.Count);
            var cache = new QrFactorizationCache();

            foreach (var entry in dataRowsByProtein)
            {
                FoldChangeDataSet dataSet      = FoldChangeCalculator.MakeDataSet(entry.Value);
                var             designMatrix   = DesignMatrix.GetDesignMatrix(dataSet, includeInteraction);
                var             foldChange     = designMatrix.PerformLinearFit(cache).First();
                LinearFitResult expectedResult = null;
                if (null != expectedResults)
                {
                    Assert.IsTrue(expectedResults.TryGetValue(entry.Key, out expectedResult));
                }
                if (null != expectedResult)
                {
                    Assert.AreEqual(expectedResult.EstimatedValue, foldChange.EstimatedValue, 1E-6);
                    Assert.AreEqual(expectedResult.DegreesOfFreedom, foldChange.DegreesOfFreedom);
                    Assert.AreEqual(expectedResult.StandardError, foldChange.StandardError, 1E-6);
                    Assert.AreEqual(expectedResult.TValue, foldChange.TValue, 1E-6);
                    Assert.AreEqual(expectedResult.PValue, foldChange.PValue, 1E-6);
                }
            }
        }
Beispiel #2
0
        private GroupComparisonResult CalculateFoldChangeUsingRegression(
            GroupComparisonSelector selector, List <RunAbundance> runAbundances)
        {
            var detailRows = new List <DataRowDetails>();

            GetDataRows(selector, detailRows);
            if (detailRows.Count == 0)
            {
                return(null);
            }
            runAbundances = runAbundances ?? new List <RunAbundance>();
            var foldChangeDataRows = detailRows
                                     .Where(row => !double.IsNaN(row.GetLog2Abundance()) && !double.IsInfinity(row.GetLog2Abundance()))
                                     .Select(row => new FoldChangeCalculator.DataRow
            {
                Abundance = row.GetLog2Abundance(),
                Control   = row.Control,
                Feature   = row.IdentityPath,
                Run       = row.ReplicateIndex,
                Subject   = row.BioReplicate,
            }).ToArray();
            FoldChangeDataSet runQuantificationDataSet = FoldChangeCalculator.MakeDataSet(foldChangeDataRows);
            var runNumberToReplicateIndex     = FoldChangeCalculator.GetUniqueList(foldChangeDataRows.Select(row => row.Run));
            var runQuantificationDesignMatrix = DesignMatrix.GetRunQuantificationDesignMatrix(runQuantificationDataSet);
            var quantifiedRuns = runQuantificationDesignMatrix.PerformLinearFit(_qrFactorizationCache);
            var subjects       = new List <int>();

            for (int run = 0; run < quantifiedRuns.Count; run++)
            {
                int iRow = runQuantificationDataSet.Runs.IndexOf(run);
                subjects.Add(runQuantificationDataSet.Subjects[iRow]);
                var replicateIndex   = runNumberToReplicateIndex[run];
                var replicateDetails = _replicateIndexes.First(kvp => kvp.Key == replicateIndex).Value;

                runAbundances.Add(new RunAbundance
                {
                    ReplicateIndex = replicateIndex,
                    Control        = replicateDetails.IsControl,
                    BioReplicate   = replicateDetails.BioReplicate,
                    Log2Abundance  = quantifiedRuns[run].EstimatedValue
                });
            }
            var abundances        = quantifiedRuns.Select(result => result.EstimatedValue).ToArray();
            var quantifiedDataSet = new FoldChangeDataSet(
                abundances,
                Enumerable.Repeat(0, quantifiedRuns.Count).ToArray(),
                Enumerable.Range(0, quantifiedRuns.Count).ToArray(),
                subjects,
                runQuantificationDataSet.SubjectControls);

            if (quantifiedDataSet.SubjectControls.Distinct().Count() < 2)
            {
                return(null);
            }

            var foldChangeResult = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false).PerformLinearFit(_qrFactorizationCache).First();

            return(new GroupComparisonResult(selector, quantifiedRuns.Count, foldChangeResult, runAbundances));
        }
Beispiel #3
0
        [Timeout(36000000)]  // These can take a long time in code coverage mode
        public void TestGroupComparisonWithRunQuantification()
        {
            var csvReader                = new DsvFileReader(GetTextReader("quant.csv"), ',');
            var dataRowsByProtein        = ToDataRows(ReadCsvFile(csvReader));
            var expectedResultsByProtein = ReadCsvFile(new DsvFileReader(GetTextReader("result_newtesting_v2.csv"), ','))
                                           .ToDictionary(row => row["Protein"]);
            var cache = new QrFactorizationCache();

            foreach (var entry in dataRowsByProtein)
            {
                FoldChangeDataSet dataSet = FoldChangeCalculator.MakeDataSet(entry.Value);
                var quantifiedRuns        = DesignMatrix.GetRunQuantificationDesignMatrix(dataSet).PerformLinearFit(cache);
                var subjects = new List <int>();

                for (int run = 0; run < quantifiedRuns.Count; run++)
                {
                    int iRow = dataSet.Runs.IndexOf(run);
                    subjects.Add(dataSet.Subjects[iRow]);
                }
                var abundances        = quantifiedRuns.Select(result => result.EstimatedValue).ToArray();
                var quantifiedDataSet = new FoldChangeDataSet(
                    abundances,
                    Enumerable.Repeat(0, quantifiedRuns.Count).ToArray(),
                    Enumerable.Range(0, quantifiedRuns.Count).ToArray(),
                    subjects,
                    dataSet.SubjectControls);
                var    foldChangeResult = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false).PerformLinearFit(cache).First();
                var    expectedResult   = expectedResultsByProtein[entry.Key];
                string message          = entry.Key;
                Assert.AreEqual(double.Parse(expectedResult["logFC"], CultureInfo.InvariantCulture), foldChangeResult.EstimatedValue, 1E-6, message);
                Assert.AreEqual(double.Parse(expectedResult["SE"], CultureInfo.InvariantCulture), foldChangeResult.StandardError, 1E-6, message);
                Assert.AreEqual(int.Parse(expectedResult["DF"], CultureInfo.InvariantCulture), foldChangeResult.DegreesOfFreedom, message);
                if (Math.Abs(foldChangeResult.EstimatedValue) > 1E-8)
                {
                    Assert.AreEqual(double.Parse(expectedResult["pvalue"], CultureInfo.InvariantCulture), foldChangeResult.PValue, 1E-6, message);
                    Assert.AreEqual(double.Parse(expectedResult["Tvalue"], CultureInfo.InvariantCulture), foldChangeResult.TValue, 1E-6, message);
                }
            }
        }
Beispiel #4
0
        private GroupComparisonResult CalculateFoldChangeWithSummarization(GroupComparisonSelector selector,
                                                                           List <RunAbundance> runAbundances, Func <IList <DataRowDetails>, IList <RunAbundance> > summarizationFunction)
        {
            var detailRows = new List <DataRowDetails>();

            GetDataRows(selector, detailRows);
            if (detailRows.Count == 0)
            {
                return(null);
            }
            var replicateRows = summarizationFunction(detailRows);

            if (replicateRows.Count == 0)
            {
                return(null);
            }
            if (null != runAbundances)
            {
                runAbundances.AddRange(replicateRows);
            }

            var summarizedRows = replicateRows;

            if (replicateRows.Any(row => null != row.BioReplicate))
            {
                var groupedByBioReplicate = replicateRows.ToLookup(
                    row => new KeyValuePair <string, bool>(row.BioReplicate, row.Control));
                summarizedRows = groupedByBioReplicate.Select(
                    grouping =>
                {
                    return(new RunAbundance()
                    {
                        BioReplicate = grouping.Key.Key,
                        Control = grouping.Key.Value,
                        ReplicateIndex = -1,
                        Log2Abundance = grouping.Average(row => row.Log2Abundance),
                    });
                }).ToList();
            }

            var quantifiedDataSet = new FoldChangeDataSet(
                summarizedRows.Select(row => row.Log2Abundance).ToArray(),
                Enumerable.Repeat(0, summarizedRows.Count).ToArray(),
                Enumerable.Range(0, summarizedRows.Count).ToArray(),
                Enumerable.Range(0, summarizedRows.Count).ToArray(),
                summarizedRows.Select(row => row.Control).ToArray());

            if (quantifiedDataSet.SubjectControls.Distinct().Count() < 2)
            {
                return(null);
            }
            var designMatrix     = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false);
            var foldChangeResult = designMatrix.PerformLinearFit(_qrFactorizationCache).First();

            // Note that because the design matrix has only two columns, this is equivalent to a simple linear
            // regression
            //            var statsAbundances = new Util.Statistics(summarizedRows.Select(row => row.Log2Abundance));
            //            var statsXValues = new Util.Statistics(summarizedRows.Select(row => row.Control ? 0.0 : 1));
            //            var slope = statsAbundances.Slope(statsXValues);

            return(new GroupComparisonResult(selector, replicateRows.Count, foldChangeResult));
        }