public static string ReplaceMissingsByGaussianWholeMatrix(double width, double shift, IMatrixData data, int[] colInds) { List<float> allValues = new List<float>(); for (int i = 0; i < data.RowCount; i++){ foreach (int t in colInds){ float x = GetValue(data, i, t); if (!float.IsNaN(x) && !float.IsInfinity(x)){ allValues.Add(x); } } } double stddev; double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev); if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev)){ return "Imputation failed since mean and standard deviation could not be calculated."; } double m = mean - shift*stddev; double s = stddev*width; Random2 r = new Random2(); for (int i = 0; i < data.RowCount; i++){ foreach (int colInd in colInds){ float x = GetValue(data, i, colInd); if (float.IsNaN(x) || float.IsInfinity(x)){ if (colInd < data.ColumnCount){ data.Values.Set(i, colInd, (float) r.NextGaussian(m, s)); data.IsImputed[i, colInd] = true; } else{ data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s); } } } } return null; }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Random2 rand = new Random2(); double std = param.GetParam<double>("Standard deviation").Value; int[] inds = param.GetParam<int[]>("Columns").Value; List<int> mainInds = new List<int>(); List<int> numInds = new List<int>(); foreach (int ind in inds){ if (ind < mdata.ColumnCount){ mainInds.Add(ind); } else{ numInds.Add(ind - mdata.ColumnCount); } } foreach (int j in mainInds){ for (int i = 0; i < mdata.RowCount; i++){ mdata.Values.Set(i, j, mdata.Values.Get(i, j) + (float) rand.NextGaussian(0, std)); } } foreach (int j in numInds){ for (int i = 0; i < mdata.RowCount; i++){ mdata.NumericColumns[j][i] += (float) rand.NextGaussian(0, std); } } }
public static void ReplaceMissingsByGaussianWholeMatrix(double width, double shift, IMatrixData data, int[] colInds) { List <float> allValues = new List <float>(); for (int i = 0; i < data.RowCount; i++) { foreach (int t in colInds) { float x = data.Values[i, t]; if (!float.IsNaN(x) && !float.IsInfinity(x)) { allValues.Add(x); } } } double stddev; double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev); double m = mean - shift * stddev; double s = stddev * width; Random2 r = new Random2(); for (int i = 0; i < data.RowCount; i++) { foreach (int t in colInds) { if (float.IsNaN(data.Values[i, t]) || float.IsInfinity(data.Values[i, t])) { data.Values[i, t] = (float)r.NextGaussian(m, s); data.IsImputed[i, t] = true; } } } }
private static bool ReplaceMissingsByGaussianForOneColumn(double width, double shift, IMatrixData data, int colInd, Random2 r) { List <float> allValues = new List <float>(); for (int i = 0; i < data.RowCount; i++) { float x = data.Values[i, colInd]; if (!float.IsNaN(x) && !float.IsInfinity(x)) { allValues.Add(x); } } double stddev; double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev); if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev)) { return(false); } double m = mean - shift * stddev; double s = stddev * width; for (int i = 0; i < data.RowCount; i++) { if (float.IsNaN(data.Values[i, colInd]) || float.IsInfinity(data.Values[i, colInd])) { data.Values[i, colInd] = (float)r.NextGaussian(m, s); data.IsImputed[i, colInd] = true; } } return(true); }
public static string ReplaceMissingsByGaussianWholeMatrix(double width, double shift, IMatrixData data, int[] colInds) { List <float> allValues = new List <float>(); for (int i = 0; i < data.RowCount; i++) { foreach (int t in colInds) { float x = GetValue(data, i, t); if (!float.IsNaN(x) && !float.IsInfinity(x)) { allValues.Add(x); } } } double stddev; double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev); if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev)) { return("Imputation failed since mean and standard deviation could not be calculated."); } double m = mean - shift * stddev; double s = stddev * width; Random2 r = new Random2(7); for (int i = 0; i < data.RowCount; i++) { foreach (int colInd in colInds) { float x = GetValue(data, i, colInd); if (float.IsNaN(x) || float.IsInfinity(x)) { if (colInd < data.ColumnCount) { data.Values.Set(i, colInd, (float)r.NextGaussian(m, s)); data.IsImputed[i, colInd] = true; } else { data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s); } } } } return(null); }
private static bool ReplaceMissingsByGaussianForOneColumn(double width, double shift, IMatrixData data, int colInd, Random2 r) { List <double> allValues = new List <double>(); for (int i = 0; i < data.RowCount; i++) { double x = GetValue(data, i, colInd); if (!double.IsNaN(x) && !double.IsInfinity(x)) { allValues.Add(x); } } double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out double stddev); if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev)) { return(false); } double m = mean - shift * stddev; double s = stddev * width; for (int i = 0; i < data.RowCount; i++) { double x = GetValue(data, i, colInd); if (double.IsNaN(x) || double.IsInfinity(x)) { if (colInd < data.ColumnCount) { data.Values.Set(i, colInd, r.NextGaussian(m, s)); data.IsImputed[i, colInd] = true; } else { data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s); } } } return(true); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Random2 rand = new Random2(7); double std = param.GetParam <double>("Standard deviation").Value; int[] inds = param.GetParam <int[]>("Columns").Value; List <int> mainInds = new List <int>(); List <int> numInds = new List <int>(); foreach (int ind in inds) { if (ind < mdata.ColumnCount) { mainInds.Add(ind); } else { numInds.Add(ind - mdata.ColumnCount); } } foreach (int j in mainInds) { for (int i = 0; i < mdata.RowCount; i++) { mdata.Values.Set(i, j, mdata.Values.Get(i, j) + rand.NextGaussian(0, std)); } } foreach (int j in numInds) { for (int i = 0; i < mdata.RowCount; i++) { mdata.NumericColumns[j][i] += rand.NextGaussian(0, std); } } }
private static bool ReplaceMissingsByGaussianForOneColumn(double width, double shift, IMatrixData data, int colInd, Random2 r) { List<float> allValues = new List<float>(); for (int i = 0; i < data.RowCount; i++){ float x = GetValue(data, i, colInd); if (!float.IsNaN(x) && !float.IsInfinity(x)){ allValues.Add(x); } } double stddev; double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev); if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev)){ return false; } double m = mean - shift*stddev; double s = stddev*width; for (int i = 0; i < data.RowCount; i++){ float x = GetValue(data, i, colInd); if (float.IsNaN(x) || float.IsInfinity(x)){ if (colInd < data.ColumnCount){ data.Values.Set(i, colInd, (float) r.NextGaussian(m, s)); data.IsImputed[i, colInd] = true; } else{ data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s); } } } return true; }
public void LoadData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int nrows = param.GetParam <int>("Number of rows").Value; int ncols = param.GetParam <int>("Number of columns").Value; int missingPerc = param.GetParam <int>("Percentage of missing values").Value; int ngroups = param.GetParam <int>("Number of groups").Value; ParameterWithSubParams <bool> setSeed = param.GetParamWithSubParams <bool>("Set seed"); Random2 randy = setSeed.Value? new Random2(setSeed.GetSubParameters().GetParam <int>("Seed").Value) : new Random2(); ngroups = Math.Min(ngroups, ncols); float[,] m = new float[nrows, ncols]; ParameterWithSubParams <int> x = param.GetParamWithSubParams <int>("Mode"); Parameters subParams = x.GetSubParameters(); List <string> catColNames = new List <string>(); List <string[][]> catCols = new List <string[][]>(); switch (x.Value) { case 0: for (int i = 0; i < m.GetLength(0); i++) { for (int j = 0; j < m.GetLength(1); j++) { if (randy.NextDouble() * 100 < missingPerc) { m[i, j] = float.NaN; } else { m[i, j] = (float)randy.NextGaussian(); } } } break; case 1: float dist = (float)subParams.GetParam <double>("Distance").Value; string[][] col = new string[m.GetLength(0)][]; for (int i = 0; i < m.GetLength(0); i++) { bool which = randy.NextDouble() < 0.5; for (int j = 0; j < m.GetLength(1); j++) { if (randy.NextDouble() * 100 < missingPerc) { m[i, j] = float.NaN; } else { m[i, j] = (float)randy.NextGaussian(); } } if (which) { m[i, 0] += dist; col[i] = new[] { "Group1" }; } else { col[i] = new[] { "Group2" }; } } catColNames.Add("Grouping"); catCols.Add(col); break; case 2: double boxLen = subParams.GetParam <double>("Box size").Value; int howMany = subParams.GetParam <int>("How many").Value; string[][] col1 = new string[m.GetLength(0)][]; float[,] centers = new float[howMany, m.GetLength(1)]; for (int i = 0; i < centers.GetLength(0); i++) { for (int j = 0; j < centers.GetLength(1); j++) { centers[i, j] = (float)(randy.NextDouble() * boxLen); } } for (int i = 0; i < m.GetLength(0); i++) { int which = (int)(randy.NextDouble() * howMany); for (int j = 0; j < m.GetLength(1); j++) { if (randy.NextDouble() * 100 < missingPerc) { m[i, j] = float.NaN; } else { m[i, j] = (float)randy.NextGaussian() + centers[which, j]; } } col1[i] = new[] { "Group" + (which + 1) }; } catColNames.Add("Grouping"); catCols.Add(col1); break; } List <string> exprColumnNames = new List <string>(); for (int i = 0; i < ncols; i++) { exprColumnNames.Add("Column " + (i + 1)); } mdata.Name = "Random matrix"; mdata.ColumnNames = exprColumnNames; mdata.Values.Set(m); mdata.Quality.Set(new float[m.GetLength(0), m.GetLength(1)]); mdata.IsImputed.Set(new bool[m.GetLength(0), m.GetLength(1)]); mdata.SetAnnotationColumns(new List <string>(), new List <string[]>(), catColNames, catCols, new List <string>(), new List <double[]>(), new List <string>(), new List <double[][]>()); mdata.Origin = "Random matrix"; string[] names = new string[mdata.RowCount]; for (int i = 0; i < names.Length; i++) { names[i] = "Row " + (i + 1); } mdata.AddStringColumn("Name", "Name", names); string[][] grouping = new string[ncols][]; for (int i = 0; i < ncols; i++) { int ig = (i * ngroups) / ncols + 1; grouping[i] = new[] { "Group" + ig }; } mdata.AddCategoryRow("Grouping", "Grouping", grouping); }