public static void ReplaceMissingsByGaussianWholeMatrix(double width, double shift, IMatrixData data) { int[] colInds = ArrayUtils.ConsecutiveInts(data.ExpressionColumnCount); List<float> allValues = new List<float>(); for (int i = 0; i < data.RowCount; i++){ foreach (int t in colInds){ float x = data[i, t]; if (!float.IsNaN(x) && !float.IsInfinity(x)){ allValues.Add(x); } } } double stddev; double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev); double m = mean - shift*stddev; double s = stddev*width; Random2 r = new Random2(); for (int i = 0; i < data.RowCount; i++){ foreach (int t in colInds){ if (float.IsNaN(data[i, t]) || float.IsInfinity(data[i, t])){ data[i, t] = (float) r.NextGaussian(m, s); data.IsImputed[i, t] = true; } } } }
private static void ReplaceMissingsByGaussianForOneColumn(double width, double shift, IMatrixData data, int colInd) { List<float> allValues = new List<float>(); for (int i = 0; i < data.RowCount; i++){ float x = data[i, colInd]; if (!float.IsNaN(x) && !float.IsInfinity(x)){ allValues.Add(x); } } double stddev; double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev); double m = mean - shift*stddev; double s = stddev*width; Random2 r = new Random2(); for (int i = 0; i < data.RowCount; i++){ if (float.IsNaN(data[i, colInd]) || float.IsInfinity(data[i, colInd])){ data[i, colInd] = (float) r.NextGaussian(m, s); data.IsImputed[i, colInd] = true; } } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Random2 rand = new Random2(); double std = param.GetDoubleParam("Standard deviation").Value; for (int i = 0; i < mdata.RowCount; i++){ for (int j = 0; j < mdata.ExpressionColumnCount; j++){ mdata[i, j] += (float) rand.NextGaussian(0, std); } } }