Beispiel #1
0
 internal static IMatrixData NonzeroFilter1Split(bool rows, int minValids, bool percentage, IMatrixData mdata,
                                                 Parameters param, double threshold, double threshold2, FilteringMode filterMode)
 {
     if (rows)
     {
         IMatrixData supplTab  = (IMatrixData)mdata.Clone();
         List <int>  valids    = new List <int>();
         List <int>  notvalids = new List <int>();
         for (int i = 0; i < mdata.RowCount; i++)
         {
             int count = 0;
             for (int j = 0; j < mdata.ColumnCount; j++)
             {
                 if ((IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode)))
                 {
                     count++;
                 }
             }
             if ((Valid(count, minValids, percentage, mdata.ColumnCount)))
             {
                 valids.Add(i);
             }
             else
             {
                 notvalids.Add(i);
             }
         }
         //  FilterRowsNew(mdata, param, valids.ToArray());
         supplTab.ExtractRows(notvalids.ToArray());
         return(supplTab);
     }
     else
     {
         IMatrixData supplTab  = (IMatrixData)mdata.Clone();
         List <int>  valids    = new List <int>();
         List <int>  notvalids = new List <int>();
         for (int j = 0; j < mdata.ColumnCount; j++)
         {
             int count = 0;
             for (int i = 0; i < mdata.RowCount; i++)
             {
                 if (IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode))
                 {
                     count++;
                 }
             }
             if (Valid(count, minValids, percentage, mdata.RowCount))
             {
                 valids.Add(j);
             }
             else
             {
                 notvalids.Add(j);
             }
         }
         supplTab.ExtractColumns(notvalids.ToArray());
         // FilterColumnsNew(mdata, param, valids.ToArray());
         return(supplTab);
     }
 }
Beispiel #2
0
        public static IMatrixData CreateSupplTabSplitColumns(IMatrixData mdata, int[] rows)
        {
            IMatrixData supplTab = (IMatrixData)mdata.Clone();

            supplTab.ExtractColumns(rows);
            return(supplTab);
        }
Beispiel #3
0
 private static void ExpressionToNumeric(IList <int> colInds, IMatrixData mdata)
 {
     int[] remainingInds = ArrayUtils.Complement(colInds, mdata.ColumnCount);
     foreach (int colInd in colInds)
     {
         double[] d = ArrayUtils.ToDoubles(mdata.Values.GetColumn(colInd));
         mdata.AddNumericColumn(mdata.ColumnNames[colInd], mdata.ColumnDescriptions[colInd], d);
     }
     mdata.ExtractColumns(remainingInds);
 }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int lines = param.GetParam <int>("Number of rows").Value;

            int[] remains = Enumerable.Range(0, lines).ToArray();
            mdata.ExtractRows(remains);
            int col = param.GetParam <int>("Number of columns").Value;

            int[] remainCols = Enumerable.Range(0, col).ToArray();
            mdata.ExtractColumns(remainCols);
        }
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = param.GetParam <int[]>("Main columns").Value;
     int[] numColInds      = param.GetParam <int[]>("Numerical columns").Value;
     int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value;
     int[] catColInds      = param.GetParam <int[]>("Categorical columns").Value;
     int[] textColInds     = param.GetParam <int[]>("Text columns").Value;
     if (exColInds.Length > 0)
     {
         int ncol = data.ColumnCount;
         data.ExtractColumns(ArrayUtils.Concat(ArrayUtils.ConsecutiveInts(data.ColumnCount), exColInds));
         HashSet <string> taken = new HashSet <string>(data.ColumnNames);
         for (int i = 0; i < exColInds.Length; i++)
         {
             string s = StringUtils.GetNextAvailableName(data.ColumnNames[ncol + i], taken);
             data.ColumnNames[ncol + i] = s;
             taken.Add(s);
         }
     }
     foreach (int ind in numColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.NumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.NumericColumnNames[ind], taken);
         data.AddNumericColumn(s, data.NumericColumnDescriptions[ind], (double[])data.NumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in multiNumColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.MultiNumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.MultiNumericColumnNames[ind], taken);
         data.AddMultiNumericColumn(s, data.MultiNumericColumnDescriptions[ind],
                                    (double[][])data.MultiNumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in catColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.CategoryColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.CategoryColumnNames[ind], taken);
         data.AddCategoryColumn(s, data.CategoryColumnDescriptions[ind], data.GetCategoryColumnAt(ind));
         taken.Add(s);
     }
     foreach (int ind in textColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.StringColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.StringColumnNames[ind], taken);
         data.AddStringColumn(s, data.ColumnDescriptions[ind], (string[])data.StringColumns[ind].Clone());
         taken.Add(s);
     }
 }
 public static void FilterColumns(IMatrixData mdata, Parameters parameters, int[] cols)
 {
     bool reduceMatrix = GetReduceMatrix(parameters);
     if (reduceMatrix){
         mdata.ExtractColumns(cols);
     } else{
         Array.Sort(cols);
         string[][] row = new string[mdata.ColumnCount][];
         for (int i = 0; i < row.Length; i++){
             bool contains = Array.BinarySearch(cols, i) >= 0;
             row[i] = contains ? new[]{"Keep"} : new[]{"Discard"};
         }
         mdata.AddCategoryRow("Filter", "", row);
     }
 }
Beispiel #7
0
        public static void FilterColumnsNew(IMatrixData mdata, Parameters parameters, int[] cols)
        {
            bool reduceMatrix = UnpackFilterModeParam(parameters) == FilterMode.Reduce;

            if (parameters.GetParam <int>("Filter mode").Value == 0)
            {
                mdata.ExtractColumns(cols);
            }
            else if (parameters.GetParam <int>("Filter mode").Value == 1)
            {
                Array.Sort(cols);
                string[][] row = new string[mdata.ColumnCount][];
                for (int i = 0; i < row.Length; i++)
                {
                    bool contains = Array.BinarySearch(cols, i) >= 0;
                    row[i] = contains ? new[] { "Keep" } : new[] { "Discard" };
                }
                mdata.AddCategoryRow("Filter", "", row);
            }
            else if (parameters.GetParam <int>("Filter mode").Value == 2)
            {
                mdata.ExtractColumns(cols);
            }
        }
 public static void ReplaceMissingsByGaussianByColumn(double width, double shift, IMatrixData data, int[] colInds)
 {
     List<int> invalidMain = new List<int>();
     Random2 r = new Random2();
     foreach (int colInd in colInds){
         bool success = ReplaceMissingsByGaussianForOneColumn(width, shift, data, colInd, r);
         if (!success){
             if (colInd < data.ColumnCount){
                 invalidMain.Add(colInd);
             }
         }
     }
     if (invalidMain.Count > 0){
         data.ExtractColumns(ArrayUtils.Complement(invalidMain, data.ColumnCount));
     }
 }
Beispiel #9
0
        public static void ReplaceMissingsByGaussianByColumn(double width, double shift, IMatrixData data, int[] colInds)
        {
            List <int> valid = new List <int>();
            Random2    r     = new Random2();

            foreach (int colInd in colInds)
            {
                bool success = ReplaceMissingsByGaussianForOneColumn(width, shift, data, colInd, r);
                if (success)
                {
                    valid.Add(colInd);
                }
            }
            if (valid.Count != data.ColumnCount)
            {
                data.ExtractColumns(valid.ToArray());
            }
        }
Beispiel #10
0
        public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            if (data.NumericRowCount == 0)
            {
                processInfo.ErrString = "Data contains no numerical columns";
            }
            int rowInd = param.GetParam <int>("Numerical row").Value;

            double[] vals       = data.NumericRows[rowInd];
            int[]    o          = ArrayUtils.Order(vals);
            bool     descending = param.GetParam <int>("Order").Value > 0;

            if (descending)
            {
                ArrayUtils.Revert(o);
            }
            data.ExtractColumns(o);
        }
        public static void FilterColumns(IMatrixData mdata, Parameters parameters, int[] cols)
        {
            bool reduceMatrix = GetReduceMatrix(parameters);

            if (reduceMatrix)
            {
                mdata.ExtractColumns(cols);
            }
            else
            {
                Array.Sort(cols);
                string[][] row = new string[mdata.ColumnCount][];
                for (int i = 0; i < row.Length; i++)
                {
                    bool contains = Array.BinarySearch(cols, i) >= 0;
                    row[i] = contains ? new[] { "Keep" } : new[] { "Discard" };
                }
                mdata.AddCategoryRow("Filter", "", row);
            }
        }
        public static void ReplaceMissingsByGaussianByColumn(double width, double shift, IMatrixData data, int[] colInds)
        {
            List <int> invalidMain = new List <int>();
            Random2    r           = new Random2(7);

            foreach (int colInd in colInds)
            {
                bool success = ReplaceMissingsByGaussianForOneColumn(width, shift, data, colInd, r);
                if (!success)
                {
                    if (colInd < data.ColumnCount)
                    {
                        invalidMain.Add(colInd);
                    }
                }
            }
            if (invalidMain.Count > 0)
            {
                data.ExtractColumns(ArrayUtils.Complement(invalidMain, data.ColumnCount));
            }
        }
Beispiel #13
0
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = param.GetParam <int[]>("Main columns").Value;
     int[] numColInds      = param.GetParam <int[]>("Numerical columns").Value;
     int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value;
     int[] catColInds      = param.GetParam <int[]>("Categorical columns").Value;
     int[] textColInds     = param.GetParam <int[]>("Text columns").Value;
     data.ExtractColumns(exColInds);
     data.NumericColumns                 = ArrayUtils.SubList(data.NumericColumns, numColInds);
     data.NumericColumnNames             = ArrayUtils.SubList(data.NumericColumnNames, numColInds);
     data.NumericColumnDescriptions      = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds);
     data.MultiNumericColumns            = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds);
     data.MultiNumericColumnNames        = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds);
     data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds);
     data.CategoryColumns                = PerseusPluginUtils.GetCategoryColumns(data, catColInds);
     data.CategoryColumnNames            = ArrayUtils.SubList(data.CategoryColumnNames, catColInds);
     data.CategoryColumnDescriptions     = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds);
     data.StringColumns            = ArrayUtils.SubList(data.StringColumns, textColInds);
     data.StringColumnNames        = ArrayUtils.SubList(data.StringColumnNames, textColInds);
     data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds);
 }
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = GetValidExCols(data);
     int[] numColInds      = GetValidNumCols(data);
     int[] multiNumColInds = GetValidMultiNumCols(data);
     int[] catColInds      = GetValidCatCols(data);
     int[] textColInds     = GetValidTextCols(data);
     if (exColInds.Length < data.ColumnCount)
     {
         data.ExtractColumns(exColInds);
     }
     if (numColInds.Length < data.NumericColumnCount)
     {
         data.NumericColumns            = ArrayUtils.SubList(data.NumericColumns, numColInds);
         data.NumericColumnNames        = ArrayUtils.SubList(data.NumericColumnNames, numColInds);
         data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds);
     }
     if (multiNumColInds.Length < data.MultiNumericColumnCount)
     {
         data.MultiNumericColumns            = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds);
         data.MultiNumericColumnNames        = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds);
         data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds);
     }
     if (catColInds.Length < data.CategoryColumnCount)
     {
         data.CategoryColumns            = PerseusPluginUtils.GetCategoryColumns(data, catColInds);
         data.CategoryColumnNames        = ArrayUtils.SubList(data.CategoryColumnNames, catColInds);
         data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds);
     }
     if (textColInds.Length < data.StringColumnCount)
     {
         data.StringColumns      = ArrayUtils.SubList(data.StringColumns, textColInds);
         data.StringColumnNames  = ArrayUtils.SubList(data.StringColumnNames, textColInds);
         data.ColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds);
     }
 }
        public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
			ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[] exColInds = param.GetParam<int[]>("Main columns").Value;
            int[] numColInds = param.GetParam<int[]>("Numerical columns").Value;
            int[] multiNumColInds = param.GetParam<int[]>("Multi-numerical columns").Value;
            int[] catColInds = param.GetParam<int[]>("Categorical columns").Value;
            int[] textColInds = param.GetParam<int[]>("Text columns").Value;
            data.ExtractColumns(exColInds);
            data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numColInds);
            data.NumericColumnNames = ArrayUtils.SubList(data.NumericColumnNames, numColInds);
            data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds);
            data.MultiNumericColumns = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds);
            data.MultiNumericColumnNames = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds);
            data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds);
            data.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(data, catColInds);
            data.CategoryColumnNames = ArrayUtils.SubList(data.CategoryColumnNames, catColInds);
            data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds);
            data.StringColumns = ArrayUtils.SubList(data.StringColumns, textColInds);
            data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, textColInds);
            data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds);
        }
Beispiel #16
0
        public static void SetupMDataForInput(IMatrixData data, int[] columnIndx, int[] nameInd, double baseVal)
        {
            data.StringColumns            = ArrayUtils.SubList(data.StringColumns, nameInd);
            data.StringColumnNames        = ArrayUtils.SubList(data.StringColumnNames, nameInd);
            data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, nameInd);

            List <int> toConvert  = new List <int>();
            List <int> numList    = new List <int>();
            int        expressInd = 0;

            foreach (int i in columnIndx)
            {
                if (i < data.ColumnCount)
                {
                    toConvert.Add(i);
                    numList.Add(data.NumericColumnCount + expressInd);
                    expressInd += 1;
                }
                else
                {
                    numList.Add(i - data.ColumnCount);
                }
            }

            int[] numArr = numList.ToArray();
            //convert expression to numeric
            data.ExtractColumns(toConvert.ToArray());
            ExpressionToNumeric(Enumerable.Range(0, data.ColumnCount).ToArray(), data);

            data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numArr);


            //change data form depending whether needed
            if (baseVal > 0)
            {
                foreach (int col in numArr)
                {
                    for (int i = 0; i < data.RowCount; i++)
                    {
                        data.NumericColumns[col][i] = Math.Pow(baseVal, data.NumericColumns[col][i]);
                    }
                }
            }

            data.NumericColumnNames        = ArrayUtils.SubList(data.NumericColumnNames, numArr);
            data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numArr);

            NumericToString(Enumerable.Range(0, numArr.Length).ToArray(), data);


            for (int j = 0; j < data.StringColumnCount; j++)
            {
                for (int i = 0; i < data.RowCount; i++)
                {
                    data.StringColumns[j][i] = string.Equals(data.StringColumns[j][i], "NaN") ? "NA" : data.StringColumns[j][i];
                }
            }

            //clearing irrelevant info
            data.ClearMultiNumericColumns();
            data.ClearMultiNumericRows();
            data.ClearCategoryColumns();
            data.ClearCategoryRows();
        }