public static string ReplaceMissingsByGaussianWholeMatrix(double width, double shift, IMatrixData data, int[] colInds)
 {
     List<float> allValues = new List<float>();
     for (int i = 0; i < data.RowCount; i++){
         foreach (int t in colInds){
             float x = GetValue(data, i, t);
             if (!float.IsNaN(x) && !float.IsInfinity(x)){
                 allValues.Add(x);
             }
         }
     }
     double stddev;
     double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev);
     if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev)){
         return "Imputation failed since mean and standard deviation could not be calculated.";
     }
     double m = mean - shift*stddev;
     double s = stddev*width;
     Random2 r = new Random2();
     for (int i = 0; i < data.RowCount; i++){
         foreach (int colInd in colInds){
             float x = GetValue(data, i, colInd);
             if (float.IsNaN(x) || float.IsInfinity(x)){
                 if (colInd < data.ColumnCount){
                     data.Values.Set(i, colInd, (float) r.NextGaussian(m, s));
                     data.IsImputed[i, colInd] = true;
                 } else{
                     data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s);
                 }
             }
         }
     }
     return null;
 }
Example #2
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            Random2 rand = new Random2();
            double std = param.GetParam<double>("Standard deviation").Value;
            int[] inds = param.GetParam<int[]>("Columns").Value;
            List<int> mainInds = new List<int>();
            List<int> numInds = new List<int>();
            foreach (int ind in inds){
                if (ind < mdata.ColumnCount){
                    mainInds.Add(ind);
                } else{
                    numInds.Add(ind - mdata.ColumnCount);
                }
            }
            foreach (int j in mainInds){
                for (int i = 0; i < mdata.RowCount; i++){
                    mdata.Values.Set(i, j, mdata.Values.Get(i, j) + (float) rand.NextGaussian(0, std));
                }
            }
            foreach (int j in numInds){
                for (int i = 0; i < mdata.RowCount; i++){
                    mdata.NumericColumns[j][i] += (float) rand.NextGaussian(0, std);
                }
            }
        }
Example #3
0
        public static void ReplaceMissingsByGaussianWholeMatrix(double width, double shift, IMatrixData data, int[] colInds)
        {
            List <float> allValues = new List <float>();

            for (int i = 0; i < data.RowCount; i++)
            {
                foreach (int t in colInds)
                {
                    float x = data.Values[i, t];
                    if (!float.IsNaN(x) && !float.IsInfinity(x))
                    {
                        allValues.Add(x);
                    }
                }
            }
            double  stddev;
            double  mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev);
            double  m    = mean - shift * stddev;
            double  s    = stddev * width;
            Random2 r    = new Random2();

            for (int i = 0; i < data.RowCount; i++)
            {
                foreach (int t in colInds)
                {
                    if (float.IsNaN(data.Values[i, t]) || float.IsInfinity(data.Values[i, t]))
                    {
                        data.Values[i, t]    = (float)r.NextGaussian(m, s);
                        data.IsImputed[i, t] = true;
                    }
                }
            }
        }
Example #4
0
        private static bool ReplaceMissingsByGaussianForOneColumn(double width, double shift, IMatrixData data, int colInd,
                                                                  Random2 r)
        {
            List <float> allValues = new List <float>();

            for (int i = 0; i < data.RowCount; i++)
            {
                float x = data.Values[i, colInd];
                if (!float.IsNaN(x) && !float.IsInfinity(x))
                {
                    allValues.Add(x);
                }
            }
            double stddev;
            double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev);

            if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev))
            {
                return(false);
            }
            double m = mean - shift * stddev;
            double s = stddev * width;

            for (int i = 0; i < data.RowCount; i++)
            {
                if (float.IsNaN(data.Values[i, colInd]) || float.IsInfinity(data.Values[i, colInd]))
                {
                    data.Values[i, colInd]    = (float)r.NextGaussian(m, s);
                    data.IsImputed[i, colInd] = true;
                }
            }
            return(true);
        }
        public static string ReplaceMissingsByGaussianWholeMatrix(double width, double shift, IMatrixData data, int[] colInds)
        {
            List <float> allValues = new List <float>();

            for (int i = 0; i < data.RowCount; i++)
            {
                foreach (int t in colInds)
                {
                    float x = GetValue(data, i, t);
                    if (!float.IsNaN(x) && !float.IsInfinity(x))
                    {
                        allValues.Add(x);
                    }
                }
            }
            double stddev;
            double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev);

            if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev))
            {
                return("Imputation failed since mean and standard deviation could not be calculated.");
            }
            double  m = mean - shift * stddev;
            double  s = stddev * width;
            Random2 r = new Random2(7);

            for (int i = 0; i < data.RowCount; i++)
            {
                foreach (int colInd in colInds)
                {
                    float x = GetValue(data, i, colInd);
                    if (float.IsNaN(x) || float.IsInfinity(x))
                    {
                        if (colInd < data.ColumnCount)
                        {
                            data.Values.Set(i, colInd, (float)r.NextGaussian(m, s));
                            data.IsImputed[i, colInd] = true;
                        }
                        else
                        {
                            data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s);
                        }
                    }
                }
            }
            return(null);
        }
        private static bool ReplaceMissingsByGaussianForOneColumn(double width, double shift, IMatrixData data, int colInd,
                                                                  Random2 r)
        {
            List <double> allValues = new List <double>();

            for (int i = 0; i < data.RowCount; i++)
            {
                double x = GetValue(data, i, colInd);
                if (!double.IsNaN(x) && !double.IsInfinity(x))
                {
                    allValues.Add(x);
                }
            }
            double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out double stddev);

            if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev))
            {
                return(false);
            }
            double m = mean - shift * stddev;
            double s = stddev * width;

            for (int i = 0; i < data.RowCount; i++)
            {
                double x = GetValue(data, i, colInd);
                if (double.IsNaN(x) || double.IsInfinity(x))
                {
                    if (colInd < data.ColumnCount)
                    {
                        data.Values.Set(i, colInd, r.NextGaussian(m, s));
                        data.IsImputed[i, colInd] = true;
                    }
                    else
                    {
                        data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s);
                    }
                }
            }
            return(true);
        }
Example #7
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            Random2 rand = new Random2(7);
            double  std  = param.GetParam <double>("Standard deviation").Value;

            int[]      inds     = param.GetParam <int[]>("Columns").Value;
            List <int> mainInds = new List <int>();
            List <int> numInds  = new List <int>();

            foreach (int ind in inds)
            {
                if (ind < mdata.ColumnCount)
                {
                    mainInds.Add(ind);
                }
                else
                {
                    numInds.Add(ind - mdata.ColumnCount);
                }
            }
            foreach (int j in mainInds)
            {
                for (int i = 0; i < mdata.RowCount; i++)
                {
                    mdata.Values.Set(i, j, mdata.Values.Get(i, j) + rand.NextGaussian(0, std));
                }
            }
            foreach (int j in numInds)
            {
                for (int i = 0; i < mdata.RowCount; i++)
                {
                    mdata.NumericColumns[j][i] += rand.NextGaussian(0, std);
                }
            }
        }
        private static bool ReplaceMissingsByGaussianForOneColumn(double width, double shift, IMatrixData data, int colInd,
			Random2 r)
        {
            List<float> allValues = new List<float>();
            for (int i = 0; i < data.RowCount; i++){
                float x = GetValue(data, i, colInd);
                if (!float.IsNaN(x) && !float.IsInfinity(x)){
                    allValues.Add(x);
                }
            }
            double stddev;
            double mean = ArrayUtils.MeanAndStddev(allValues.ToArray(), out stddev);
            if (double.IsNaN(mean) || double.IsInfinity(mean) || double.IsNaN(stddev) || double.IsInfinity(stddev)){
                return false;
            }
            double m = mean - shift*stddev;
            double s = stddev*width;
            for (int i = 0; i < data.RowCount; i++){
                float x = GetValue(data, i, colInd);
                if (float.IsNaN(x) || float.IsInfinity(x)){
                    if (colInd < data.ColumnCount){
                        data.Values.Set(i, colInd, (float) r.NextGaussian(m, s));
                        data.IsImputed[i, colInd] = true;
                    } else{
                        data.NumericColumns[colInd - data.ColumnCount][i] = r.NextGaussian(m, s);
                    }
                }
            }
            return true;
        }
Example #9
0
        public void LoadData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents,
                             ProcessInfo processInfo)
        {
            int nrows       = param.GetParam <int>("Number of rows").Value;
            int ncols       = param.GetParam <int>("Number of columns").Value;
            int missingPerc = param.GetParam <int>("Percentage of missing values").Value;
            int ngroups     = param.GetParam <int>("Number of groups").Value;
            ParameterWithSubParams <bool> setSeed = param.GetParamWithSubParams <bool>("Set seed");
            Random2 randy = setSeed.Value? new Random2(setSeed.GetSubParameters().GetParam <int>("Seed").Value) : new Random2();

            ngroups    = Math.Min(ngroups, ncols);
            float[,] m = new float[nrows, ncols];
            ParameterWithSubParams <int> x = param.GetParamWithSubParams <int>("Mode");
            Parameters        subParams    = x.GetSubParameters();
            List <string>     catColNames  = new List <string>();
            List <string[][]> catCols      = new List <string[][]>();

            switch (x.Value)
            {
            case 0:
                for (int i = 0; i < m.GetLength(0); i++)
                {
                    for (int j = 0; j < m.GetLength(1); j++)
                    {
                        if (randy.NextDouble() * 100 < missingPerc)
                        {
                            m[i, j] = float.NaN;
                        }
                        else
                        {
                            m[i, j] = (float)randy.NextGaussian();
                        }
                    }
                }
                break;

            case 1:
                float      dist = (float)subParams.GetParam <double>("Distance").Value;
                string[][] col  = new string[m.GetLength(0)][];
                for (int i = 0; i < m.GetLength(0); i++)
                {
                    bool which = randy.NextDouble() < 0.5;
                    for (int j = 0; j < m.GetLength(1); j++)
                    {
                        if (randy.NextDouble() * 100 < missingPerc)
                        {
                            m[i, j] = float.NaN;
                        }
                        else
                        {
                            m[i, j] = (float)randy.NextGaussian();
                        }
                    }
                    if (which)
                    {
                        m[i, 0] += dist;
                        col[i]   = new[] { "Group1" };
                    }
                    else
                    {
                        col[i] = new[] { "Group2" };
                    }
                }
                catColNames.Add("Grouping");
                catCols.Add(col);
                break;

            case 2:
                double     boxLen  = subParams.GetParam <double>("Box size").Value;
                int        howMany = subParams.GetParam <int>("How many").Value;
                string[][] col1    = new string[m.GetLength(0)][];
                float[,] centers = new float[howMany, m.GetLength(1)];
                for (int i = 0; i < centers.GetLength(0); i++)
                {
                    for (int j = 0; j < centers.GetLength(1); j++)
                    {
                        centers[i, j] = (float)(randy.NextDouble() * boxLen);
                    }
                }
                for (int i = 0; i < m.GetLength(0); i++)
                {
                    int which = (int)(randy.NextDouble() * howMany);
                    for (int j = 0; j < m.GetLength(1); j++)
                    {
                        if (randy.NextDouble() * 100 < missingPerc)
                        {
                            m[i, j] = float.NaN;
                        }
                        else
                        {
                            m[i, j] = (float)randy.NextGaussian() + centers[which, j];
                        }
                    }
                    col1[i] = new[] { "Group" + (which + 1) };
                }
                catColNames.Add("Grouping");
                catCols.Add(col1);
                break;
            }
            List <string> exprColumnNames = new List <string>();

            for (int i = 0; i < ncols; i++)
            {
                exprColumnNames.Add("Column " + (i + 1));
            }
            mdata.Name        = "Random matrix";
            mdata.ColumnNames = exprColumnNames;
            mdata.Values.Set(m);
            mdata.Quality.Set(new float[m.GetLength(0), m.GetLength(1)]);
            mdata.IsImputed.Set(new bool[m.GetLength(0), m.GetLength(1)]);
            mdata.SetAnnotationColumns(new List <string>(), new List <string[]>(), catColNames, catCols, new List <string>(),
                                       new List <double[]>(), new List <string>(), new List <double[][]>());
            mdata.Origin = "Random matrix";
            string[] names = new string[mdata.RowCount];
            for (int i = 0; i < names.Length; i++)
            {
                names[i] = "Row " + (i + 1);
            }
            mdata.AddStringColumn("Name", "Name", names);
            string[][] grouping = new string[ncols][];
            for (int i = 0; i < ncols; i++)
            {
                int ig = (i * ngroups) / ncols + 1;
                grouping[i] = new[] { "Group" + ig };
            }
            mdata.AddCategoryRow("Grouping", "Grouping", grouping);
        }