private void TestGroupComparison(TextReader textReader, bool includeInteraction, IDictionary <string, LinearFitResult> expectedResults) { var csvReader = new DsvFileReader(textReader, ','); var dataRowsByProtein = ToDataRows(ReadCsvFile(csvReader)); Assert.AreNotEqual(0, dataRowsByProtein.Count); var cache = new QrFactorizationCache(); foreach (var entry in dataRowsByProtein) { FoldChangeDataSet dataSet = FoldChangeCalculator.MakeDataSet(entry.Value); var designMatrix = DesignMatrix.GetDesignMatrix(dataSet, includeInteraction); var foldChange = designMatrix.PerformLinearFit(cache).First(); LinearFitResult expectedResult = null; if (null != expectedResults) { Assert.IsTrue(expectedResults.TryGetValue(entry.Key, out expectedResult)); } if (null != expectedResult) { Assert.AreEqual(expectedResult.EstimatedValue, foldChange.EstimatedValue, 1E-6); Assert.AreEqual(expectedResult.DegreesOfFreedom, foldChange.DegreesOfFreedom); Assert.AreEqual(expectedResult.StandardError, foldChange.StandardError, 1E-6); Assert.AreEqual(expectedResult.TValue, foldChange.TValue, 1E-6); Assert.AreEqual(expectedResult.PValue, foldChange.PValue, 1E-6); } } }
[Timeout(36000000)] // These can take a long time in code coverage mode public void TestRunQuantification() { var cache = new QrFactorizationCache(); var csvReader = new DsvFileReader(GetTextReader("quant.csv"), ','); var dataRowsByProtein = ToDataRows(ReadCsvFile(csvReader)); var expectedResultsByProtein = ReadCsvFile(new DsvFileReader(GetTextReader("runquantdata.csv"), ',')).ToLookup(row => row["Protein"]); foreach (var entry in dataRowsByProtein) { var expectedResultsByRun = expectedResultsByProtein[entry.Key].ToLookup(row => row["RUN"]); FoldChangeDataSet dataSet = FoldChangeCalculator.MakeDataSet(entry.Value); var designMatrix = DesignMatrix.GetRunQuantificationDesignMatrix(dataSet); var runNames = FoldChangeCalculator.GetUniqueList(entry.Value.Select(row => row.Run)); var results = designMatrix.PerformLinearFit(cache); for (int i = 0; i < dataSet.RunCount; i++) { string message = string.Format("Protein:{0} Run:{1}", entry.Key, runNames[i]); var expectedRow = expectedResultsByRun[runNames[i]].FirstOrDefault(); Assert.IsNotNull(expectedRow); Assert.AreEqual(double.Parse(expectedRow["LogIntensities"], CultureInfo.InvariantCulture), results[i].EstimatedValue, .000001, message); Assert.AreEqual(int.Parse(expectedRow["NumFeature"], CultureInfo.InvariantCulture), dataSet.FeatureCount, message); Assert.AreEqual(int.Parse(expectedRow["NumPeaks"], CultureInfo.InvariantCulture), dataSet.GetFeatureCountForRun(i), message); } } }
public void TestInverseVarianceCovarianceMatrix() { Console.WriteLine("計画行列の分散・共分行列を返すメソッドのテストです。"); double[,] A = new double[3, 2]; A[0, 0] = -1; A[0, 1] = 8; A[1, 0] = 0; A[1, 1] = 4; A[2, 0] = 1; A[2, 1] = -4; TestMatrix.WriteLineMatrix(A, "計画行列" + nameof(A)); double[,] B = DesignMatrix.VarianceCovariance(A); TestMatrix.WriteLineMatrix(B, nameof(A) + "の分散・共分散行列" + nameof(B)); double[,] C = DesignMatrix.InverseVarianceCovarianceMatrix(A); TestMatrix.WriteLineMatrix(C, nameof(A) + "の分散・共分散行列の逆行列" + nameof(C)); double[,] D = Matrix.Multiply(B, C); TestMatrix.WriteLineMatrix(D, nameof(B) + nameof(C)); }
private GroupComparisonResult CalculateFoldChangeUsingRegression( GroupComparisonSelector selector, List <RunAbundance> runAbundances) { var detailRows = new List <DataRowDetails>(); GetDataRows(selector, detailRows); if (detailRows.Count == 0) { return(null); } runAbundances = runAbundances ?? new List <RunAbundance>(); var foldChangeDataRows = detailRows .Where(row => !double.IsNaN(row.GetLog2Abundance()) && !double.IsInfinity(row.GetLog2Abundance())) .Select(row => new FoldChangeCalculator.DataRow { Abundance = row.GetLog2Abundance(), Control = row.Control, Feature = row.IdentityPath, Run = row.ReplicateIndex, Subject = row.BioReplicate, }).ToArray(); FoldChangeDataSet runQuantificationDataSet = FoldChangeCalculator.MakeDataSet(foldChangeDataRows); var runNumberToReplicateIndex = FoldChangeCalculator.GetUniqueList(foldChangeDataRows.Select(row => row.Run)); var runQuantificationDesignMatrix = DesignMatrix.GetRunQuantificationDesignMatrix(runQuantificationDataSet); var quantifiedRuns = runQuantificationDesignMatrix.PerformLinearFit(_qrFactorizationCache); var subjects = new List <int>(); for (int run = 0; run < quantifiedRuns.Count; run++) { int iRow = runQuantificationDataSet.Runs.IndexOf(run); subjects.Add(runQuantificationDataSet.Subjects[iRow]); var replicateIndex = runNumberToReplicateIndex[run]; var replicateDetails = _replicateIndexes.First(kvp => kvp.Key == replicateIndex).Value; runAbundances.Add(new RunAbundance { ReplicateIndex = replicateIndex, Control = replicateDetails.IsControl, BioReplicate = replicateDetails.BioReplicate, Log2Abundance = quantifiedRuns[run].EstimatedValue }); } var abundances = quantifiedRuns.Select(result => result.EstimatedValue).ToArray(); var quantifiedDataSet = new FoldChangeDataSet( abundances, Enumerable.Repeat(0, quantifiedRuns.Count).ToArray(), Enumerable.Range(0, quantifiedRuns.Count).ToArray(), subjects, runQuantificationDataSet.SubjectControls); if (quantifiedDataSet.SubjectControls.Distinct().Count() < 2) { return(null); } var foldChangeResult = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false).PerformLinearFit(_qrFactorizationCache).First(); return(new GroupComparisonResult(selector, quantifiedRuns.Count, foldChangeResult, runAbundances)); }
public void TestL2Norm() { Console.WriteLine("計画行列のL2ノルムに関するメソッドのテストです。"); double[,] A = new double[3, 2]; A[0, 0] = 0; A[0, 1] = 2; A[1, 0] = 1; A[1, 1] = 4; A[2, 0] = 2; A[2, 1] = 6; TestMatrix.WriteLineMatrix(A, "計画行列" + nameof(A)); double[] B = DesignMatrix.L2Norms(A); Console.WriteLine("計画行列" + nameof(A) + "の各行ベクトルのL2ノルムを返す"); foreach (double d in B) { Console.WriteLine(d); } Console.WriteLine(""); double bmax = DesignMatrix.L2NormMaximum(A); Console.WriteLine("計画行列" + nameof(A) + "の各行ベクトルのL2ノルムの最大値"); Console.WriteLine(bmax); Console.WriteLine(""); int bmaxInt = DesignMatrix.L2NormMaximumIndex(A); Console.WriteLine("計画行列" + nameof(A) + "の各行ベクトルのL2ノルムの最大値の要素番号"); Console.WriteLine(bmaxInt); Console.WriteLine(""); double bmin = DesignMatrix.L2NormMinimum(A); Console.WriteLine("計画行列" + nameof(A) + "の各行ベクトルのL2ノルムの最小値"); Console.WriteLine(bmin); Console.WriteLine(""); int bminInt = DesignMatrix.L2NormMinimumIndex(A); Console.WriteLine("計画行列" + nameof(A) + "の各行ベクトルのL2ノルムの最小値の要素番号"); Console.WriteLine(bminInt); Console.WriteLine(""); }
public void TestAverage() { Console.WriteLine("計画行列の平均ベクトル(行ベクトル)を返すメソッドのテストです。"); double[,] A = new double[3, 2]; A[0, 0] = 0; A[0, 1] = 2; A[1, 0] = 1; A[1, 1] = 4; A[2, 0] = 2; A[2, 1] = 6; TestMatrix.WriteLineMatrix(A, "計画行列" + nameof(A)); double[,] B = DesignMatrix.Average(A); TestMatrix.WriteLineMatrix(B, nameof(A) + "の平均ベクトル" + nameof(B)); }
public void TestRowVector() { Console.WriteLine("計画行列から行ベクトルを返すメソッドのテストです。"); double[,] A = new double[3, 2]; A[0, 0] = -1; A[0, 1] = 8; A[1, 0] = 0; A[1, 1] = 4; A[2, 0] = 1; A[2, 1] = -4; TestMatrix.WriteLineMatrix(A, "計画行列" + nameof(A)); double[,] B = DesignMatrix.RowVector(A, 2); TestMatrix.WriteLineMatrix(B, nameof(A) + "の要素番号2の行ベクトル" + nameof(B)); }
public void TestCorelation() { Console.WriteLine("計画行列の相関係数行列を返すメソッドのテストです。"); double[,] A = new double[3, 2]; A[0, 0] = -1; A[0, 1] = 8; A[1, 0] = 0; A[1, 1] = 4; A[2, 0] = 1; A[2, 1] = -4; TestMatrix.WriteLineMatrix(A, "計画行列" + nameof(A)); double[,] B = DesignMatrix.Corelation(A); TestMatrix.WriteLineMatrix(B, nameof(A) + "の相関係数行列" + nameof(B)); }
public void TestVarianceCovariance() { Console.WriteLine("計画行列の分散・共分行列を返すメソッドのテストです。"); double[,] A = new double[3, 2]; A[0, 0] = -1; A[0, 1] = 8; A[1, 0] = 0; A[1, 1] = 4; A[2, 0] = 1; A[2, 1] = -4; TestMatrix.WriteLineMatrix(A, "計画行列" + nameof(A)); double[,] B = DesignMatrix.VarianceCovariance(A); TestMatrix.WriteLineMatrix(B, nameof(A) + "の分散・共分散行列" + nameof(B)); }
/// <summary> /// 計画行列の識別を行う /// </summary> /// <param name="labelY"></param> /// <param name="design_Matrix_without_Constant"></param> /// <param name="iKernel"></param> /// <param name="variance_Covariance_Matrix"></param> /// <param name="CoefficientA"></param> /// <param name="design_Matrix_for_Classification"></param> /// <returns></returns> public static double[,] Classify_Design_Matrix(double[,] labelY, double[,] design_Matrix_without_Constant, IKernel iKernel, double[,] CoefficientA, double[,] design_Matrix_for_Classification) { double[,] classified = new double[design_Matrix_for_Classification.GetLength(0), 1]; //カーネルのセット iKernel.Set_Variance_Covariance_Matrix(DesignMatrix.Variance_Covariance_Matrix(design_Matrix_without_Constant)); //カーネル用の行列 double[,] kernelMatrix = new double[design_Matrix_without_Constant.GetLength(0), 1]; //識別したい計画行列を1行ずつ計算する double[,] rowVector; for (int n = 0; n < classified.GetLength(0); n++) { rowVector = Matrix.Pick_Up_Row_Vector(design_Matrix_for_Classification, n); //カーネルを計算する double[,] r_j = new double[1, 1]; for (int j = 0; j < design_Matrix_without_Constant.GetLength(0); j++) { r_j = Matrix.Pick_Up_Row_Vector(design_Matrix_without_Constant, j); kernelMatrix[j, 0] = iKernel.Calculate(rowVector, r_j); } //予測値の計算 //予測値の符号 = Σ( for j ) 教師ラベル[Y]j * 係数[A]j * カーネル K( x , [X]j ) double[,] hadamard = Matrix.Hadamard_product(labelY, CoefficientA); hadamard = Matrix.Hadamard_product(hadamard, kernelMatrix); //Σ( for j ) double sum = 0; foreach (double h in hadamard) { sum += h; } classified[n, 0] = sum; } return(classified); }
[Timeout(36000000)] // These can take a long time in code coverage mode public void TestGroupComparisonWithRunQuantification() { var csvReader = new DsvFileReader(GetTextReader("quant.csv"), ','); var dataRowsByProtein = ToDataRows(ReadCsvFile(csvReader)); var expectedResultsByProtein = ReadCsvFile(new DsvFileReader(GetTextReader("result_newtesting_v2.csv"), ',')) .ToDictionary(row => row["Protein"]); var cache = new QrFactorizationCache(); foreach (var entry in dataRowsByProtein) { FoldChangeDataSet dataSet = FoldChangeCalculator.MakeDataSet(entry.Value); var quantifiedRuns = DesignMatrix.GetRunQuantificationDesignMatrix(dataSet).PerformLinearFit(cache); var subjects = new List <int>(); for (int run = 0; run < quantifiedRuns.Count; run++) { int iRow = dataSet.Runs.IndexOf(run); subjects.Add(dataSet.Subjects[iRow]); } var abundances = quantifiedRuns.Select(result => result.EstimatedValue).ToArray(); var quantifiedDataSet = new FoldChangeDataSet( abundances, Enumerable.Repeat(0, quantifiedRuns.Count).ToArray(), Enumerable.Range(0, quantifiedRuns.Count).ToArray(), subjects, dataSet.SubjectControls); var foldChangeResult = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false).PerformLinearFit(cache).First(); var expectedResult = expectedResultsByProtein[entry.Key]; string message = entry.Key; Assert.AreEqual(double.Parse(expectedResult["logFC"], CultureInfo.InvariantCulture), foldChangeResult.EstimatedValue, 1E-6, message); Assert.AreEqual(double.Parse(expectedResult["SE"], CultureInfo.InvariantCulture), foldChangeResult.StandardError, 1E-6, message); Assert.AreEqual(int.Parse(expectedResult["DF"], CultureInfo.InvariantCulture), foldChangeResult.DegreesOfFreedom, message); if (Math.Abs(foldChangeResult.EstimatedValue) > 1E-8) { Assert.AreEqual(double.Parse(expectedResult["pvalue"], CultureInfo.InvariantCulture), foldChangeResult.PValue, 1E-6, message); Assert.AreEqual(double.Parse(expectedResult["Tvalue"], CultureInfo.InvariantCulture), foldChangeResult.TValue, 1E-6, message); } } }
/// <summary> /// 行ベクトルの識別を行う . /// Perform row vector identification . /// </summary> /// <param name="labelY"></param> /// <param name="design_Matrix_without_Constant"></param> /// <param name="iKernel"></param> /// <param name="variance_Covariance_Matrix"></param> /// <param name="CoefficientA"></param> /// <param name="rowVector"></param> /// <returns></returns> public static double Classify(double[,] labelY, double[,] design_Matrix_without_Constant, IKernel iKernel, double[,] CoefficientA, double[,] rowVector) { if (rowVector.GetLength(0) > 1) { throw new FormatException(nameof(rowVector) + "(" + rowVector.GetLength(0) + ")" + " must be 1 ."); } //カーネルのセット iKernel.Set_Variance_Covariance_Matrix(DesignMatrix.Variance_Covariance_Matrix(design_Matrix_without_Constant)); //カーネル用の行列 double[,] kernelMatrix = new double[design_Matrix_without_Constant.GetLength(0), 1]; //カーネルを計算する double[,] r_j = new double[1, 1]; for (int j = 0; j < design_Matrix_without_Constant.GetLength(0); j++) { r_j = Matrix.Pick_Up_Row_Vector(design_Matrix_without_Constant, j); kernelMatrix[j, 0] = iKernel.Calculate(rowVector, r_j); } //予測値の計算 //予測値の符号 = Σ( for j ) 教師ラベル[Y]j * 係数[A]j * カーネル K( x , [X]j ) double[,] hadamard = Matrix.Hadamard_product(labelY, CoefficientA); hadamard = Matrix.Hadamard_product(hadamard, kernelMatrix); //Σ( for j ) double sum = 0; foreach (double h in hadamard) { sum += h; } return(sum); }
/// <summary> /// 係数Aを学習する . /// Learn coefficient A . /// </summary> /// <param name="labelY"></param> /// <param name="design_Matrix_without_Constant"></param> /// <param name="iKernel"></param> /// <param name="variance_Covariance_Matrix"></param> /// <returns></returns> public static double[,] Learn(double[,] labelY, double[,] design_Matrix_without_Constant, IKernel iKernel) { //カーネルのセット iKernel.Set_Variance_Covariance_Matrix(DesignMatrix.Variance_Covariance_Matrix(design_Matrix_without_Constant)); //係数の最大値 double Hyper_Parameter_C = 1.0;// Math.Min(1.0, 1.0 / design_Matrix.GetLength(0)); //カーネル行列 double[,] kernel_Matrix = new double[design_Matrix_without_Constant.GetLength(0), design_Matrix_without_Constant.GetLength(0)]; //行ベクトル double[,] r_j = new double[1, design_Matrix_without_Constant.GetLength(1)]; double[,] r_k = new double[1, design_Matrix_without_Constant.GetLength(1)]; //カーネルを計算しておく double k_jk = 0; for (int j = 0; j < design_Matrix_without_Constant.GetLength(0); j++) { r_j = Matrix.Pick_Up_Row_Vector(design_Matrix_without_Constant, j); for (int k = j; k < design_Matrix_without_Constant.GetLength(0); k++) { r_k = Matrix.Pick_Up_Row_Vector(design_Matrix_without_Constant, k); k_jk = iKernel.Calculate(r_j, r_k); kernel_Matrix[j, k] = k_jk; kernel_Matrix[k, j] = k_jk; } } //係数の初期化 double[,] coefficient_Matrix_A = new double[design_Matrix_without_Constant.GetLength(0), 1]; double initial_a = 0.0;// 1.0 / design_Matrix.GetLength(0) / design_Matrix.GetLength(0); for (int j = 0; j < coefficient_Matrix_A.GetLength(0); j++) { coefficient_Matrix_A[j, 0] = initial_a; } //更新用の小数のインスタンス double a_j = 0; double a_k = 0; double predict_j = 0; double predict_k = 0; double min = 0; double max = 0; //学習 //2点のデータを選択する for (int j = 0; j < design_Matrix_without_Constant.GetLength(0); j++) { //計画行列からj行目のベクトルを取り出す r_j = Matrix.Pick_Up_Row_Vector(design_Matrix_without_Constant, j); for (int k = j + 1; k < design_Matrix_without_Constant.GetLength(0); k++) { //計画行列からk行目のベクトルを取り出す r_k = Matrix.Pick_Up_Row_Vector(design_Matrix_without_Constant, k); //ベクトルr_j r_kの予測計算を行う predict_j = 0; for (int j2 = 0; j2 < kernel_Matrix.GetLength(1); j2++) { predict_j += coefficient_Matrix_A[j2, 0] * labelY[j2, 0] * kernel_Matrix[j, j2]; } predict_k = 0; for (int k2 = 0; k2 < kernel_Matrix.GetLength(1); k2++) { predict_k += coefficient_Matrix_A[k2, 0] * labelY[k2, 0] * kernel_Matrix[k, k2]; } //ベクトルk の係数(仮)を計算する a_k = coefficient_Matrix_A[k, 0] + labelY[k, 0] * ( (predict_j - labelY[j, 0]) - (predict_k - labelY[k, 0]) ) / (kernel_Matrix[j, j] + kernel_Matrix[k, k] - 2 * kernel_Matrix[j, k]); //場合分け if (labelY[j, 0] * labelY[k, 0] > 0) { min = Math.Min(Hyper_Parameter_C , coefficient_Matrix_A[j, 0] + coefficient_Matrix_A[k, 0]); max = Math.Max(0 , coefficient_Matrix_A[j, 0] + coefficient_Matrix_A[k, 0] - Hyper_Parameter_C); if (min < coefficient_Matrix_A[k, 0]) { a_k = min; } else if (max < a_k && a_k < min) { //a_k = a_k; } else { a_k = max; } } else { min = Math.Min(Hyper_Parameter_C , Hyper_Parameter_C - coefficient_Matrix_A[j, 0] + coefficient_Matrix_A[k, 0]); max = Math.Max(0 , -coefficient_Matrix_A[j, 0] + coefficient_Matrix_A[k, 0]); if (min < a_k) { a_k = min; } else if (max < a_k && a_k < min) { //a_k = a_k; } else { a_k = max; } } a_j = coefficient_Matrix_A[j, 0] + labelY[j, 0] * labelY[k, 0] * (coefficient_Matrix_A[k, 0] - a_k); //係数の更新 coefficient_Matrix_A[j, 0] = a_j; coefficient_Matrix_A[k, 0] = a_k; } } //学習終わり return(coefficient_Matrix_A); }
private GroupComparisonResult CalculateFoldChangeWithSummarization(GroupComparisonSelector selector, List <RunAbundance> runAbundances, Func <IList <DataRowDetails>, IList <RunAbundance> > summarizationFunction) { var detailRows = new List <DataRowDetails>(); GetDataRows(selector, detailRows); if (detailRows.Count == 0) { return(null); } var replicateRows = summarizationFunction(detailRows); if (replicateRows.Count == 0) { return(null); } if (null != runAbundances) { runAbundances.AddRange(replicateRows); } var summarizedRows = replicateRows; if (replicateRows.Any(row => null != row.BioReplicate)) { var groupedByBioReplicate = replicateRows.ToLookup( row => new KeyValuePair <string, bool>(row.BioReplicate, row.Control)); summarizedRows = groupedByBioReplicate.Select( grouping => { return(new RunAbundance() { BioReplicate = grouping.Key.Key, Control = grouping.Key.Value, ReplicateIndex = -1, Log2Abundance = grouping.Average(row => row.Log2Abundance), }); }).ToList(); } var quantifiedDataSet = new FoldChangeDataSet( summarizedRows.Select(row => row.Log2Abundance).ToArray(), Enumerable.Repeat(0, summarizedRows.Count).ToArray(), Enumerable.Range(0, summarizedRows.Count).ToArray(), Enumerable.Range(0, summarizedRows.Count).ToArray(), summarizedRows.Select(row => row.Control).ToArray()); if (quantifiedDataSet.SubjectControls.Distinct().Count() < 2) { return(null); } var designMatrix = DesignMatrix.GetDesignMatrix(quantifiedDataSet, false); var foldChangeResult = designMatrix.PerformLinearFit(_qrFactorizationCache).First(); // Note that because the design matrix has only two columns, this is equivalent to a simple linear // regression // var statsAbundances = new Util.Statistics(summarizedRows.Select(row => row.Log2Abundance)); // var statsXValues = new Util.Statistics(summarizedRows.Select(row => row.Control ? 0.0 : 1)); // var slope = statsAbundances.Slope(statsXValues); return(new GroupComparisonResult(selector, replicateRows.Count, foldChangeResult)); }