internal static IMatrixData NonzeroFilter1Split(bool rows, int minValids, bool percentage, IMatrixData mdata, Parameters param, double threshold, double threshold2, FilteringMode filterMode) { if (rows) { IMatrixData supplTab = (IMatrixData)mdata.Clone(); List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); for (int i = 0; i < mdata.RowCount; i++) { int count = 0; for (int j = 0; j < mdata.ColumnCount; j++) { if ((IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode))) { count++; } } if ((Valid(count, minValids, percentage, mdata.ColumnCount))) { valids.Add(i); } else { notvalids.Add(i); } } // FilterRowsNew(mdata, param, valids.ToArray()); supplTab.ExtractRows(notvalids.ToArray()); return(supplTab); } else { IMatrixData supplTab = (IMatrixData)mdata.Clone(); List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); for (int j = 0; j < mdata.ColumnCount; j++) { int count = 0; for (int i = 0; i < mdata.RowCount; i++) { if (IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode)) { count++; } } if (Valid(count, minValids, percentage, mdata.RowCount)) { valids.Add(j); } else { notvalids.Add(j); } } supplTab.ExtractColumns(notvalids.ToArray()); // FilterColumnsNew(mdata, param, valids.ToArray()); return(supplTab); } }
public static IMatrixData CreateSupplTabSplitColumns(IMatrixData mdata, int[] rows) { IMatrixData supplTab = (IMatrixData)mdata.Clone(); supplTab.ExtractColumns(rows); return(supplTab); }
private static void ExpressionToNumeric(IList <int> colInds, IMatrixData mdata) { int[] remainingInds = ArrayUtils.Complement(colInds, mdata.ColumnCount); foreach (int colInd in colInds) { double[] d = ArrayUtils.ToDoubles(mdata.Values.GetColumn(colInd)); mdata.AddNumericColumn(mdata.ColumnNames[colInd], mdata.ColumnDescriptions[colInd], d); } mdata.ExtractColumns(remainingInds); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int lines = param.GetParam <int>("Number of rows").Value; int[] remains = Enumerable.Range(0, lines).ToArray(); mdata.ExtractRows(remains); int col = param.GetParam <int>("Number of columns").Value; int[] remainCols = Enumerable.Range(0, col).ToArray(); mdata.ExtractColumns(remainCols); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = param.GetParam <int[]>("Main columns").Value; int[] numColInds = param.GetParam <int[]>("Numerical columns").Value; int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value; int[] catColInds = param.GetParam <int[]>("Categorical columns").Value; int[] textColInds = param.GetParam <int[]>("Text columns").Value; if (exColInds.Length > 0) { int ncol = data.ColumnCount; data.ExtractColumns(ArrayUtils.Concat(ArrayUtils.ConsecutiveInts(data.ColumnCount), exColInds)); HashSet <string> taken = new HashSet <string>(data.ColumnNames); for (int i = 0; i < exColInds.Length; i++) { string s = StringUtils.GetNextAvailableName(data.ColumnNames[ncol + i], taken); data.ColumnNames[ncol + i] = s; taken.Add(s); } } foreach (int ind in numColInds) { HashSet <string> taken = new HashSet <string>(data.NumericColumnNames); string s = StringUtils.GetNextAvailableName(data.NumericColumnNames[ind], taken); data.AddNumericColumn(s, data.NumericColumnDescriptions[ind], (double[])data.NumericColumns[ind].Clone()); taken.Add(s); } foreach (int ind in multiNumColInds) { HashSet <string> taken = new HashSet <string>(data.MultiNumericColumnNames); string s = StringUtils.GetNextAvailableName(data.MultiNumericColumnNames[ind], taken); data.AddMultiNumericColumn(s, data.MultiNumericColumnDescriptions[ind], (double[][])data.MultiNumericColumns[ind].Clone()); taken.Add(s); } foreach (int ind in catColInds) { HashSet <string> taken = new HashSet <string>(data.CategoryColumnNames); string s = StringUtils.GetNextAvailableName(data.CategoryColumnNames[ind], taken); data.AddCategoryColumn(s, data.CategoryColumnDescriptions[ind], data.GetCategoryColumnAt(ind)); taken.Add(s); } foreach (int ind in textColInds) { HashSet <string> taken = new HashSet <string>(data.StringColumnNames); string s = StringUtils.GetNextAvailableName(data.StringColumnNames[ind], taken); data.AddStringColumn(s, data.ColumnDescriptions[ind], (string[])data.StringColumns[ind].Clone()); taken.Add(s); } }
public static void FilterColumns(IMatrixData mdata, Parameters parameters, int[] cols) { bool reduceMatrix = GetReduceMatrix(parameters); if (reduceMatrix){ mdata.ExtractColumns(cols); } else{ Array.Sort(cols); string[][] row = new string[mdata.ColumnCount][]; for (int i = 0; i < row.Length; i++){ bool contains = Array.BinarySearch(cols, i) >= 0; row[i] = contains ? new[]{"Keep"} : new[]{"Discard"}; } mdata.AddCategoryRow("Filter", "", row); } }
public static void FilterColumnsNew(IMatrixData mdata, Parameters parameters, int[] cols) { bool reduceMatrix = UnpackFilterModeParam(parameters) == FilterMode.Reduce; if (parameters.GetParam <int>("Filter mode").Value == 0) { mdata.ExtractColumns(cols); } else if (parameters.GetParam <int>("Filter mode").Value == 1) { Array.Sort(cols); string[][] row = new string[mdata.ColumnCount][]; for (int i = 0; i < row.Length; i++) { bool contains = Array.BinarySearch(cols, i) >= 0; row[i] = contains ? new[] { "Keep" } : new[] { "Discard" }; } mdata.AddCategoryRow("Filter", "", row); } else if (parameters.GetParam <int>("Filter mode").Value == 2) { mdata.ExtractColumns(cols); } }
public static void ReplaceMissingsByGaussianByColumn(double width, double shift, IMatrixData data, int[] colInds) { List<int> invalidMain = new List<int>(); Random2 r = new Random2(); foreach (int colInd in colInds){ bool success = ReplaceMissingsByGaussianForOneColumn(width, shift, data, colInd, r); if (!success){ if (colInd < data.ColumnCount){ invalidMain.Add(colInd); } } } if (invalidMain.Count > 0){ data.ExtractColumns(ArrayUtils.Complement(invalidMain, data.ColumnCount)); } }
public static void ReplaceMissingsByGaussianByColumn(double width, double shift, IMatrixData data, int[] colInds) { List <int> valid = new List <int>(); Random2 r = new Random2(); foreach (int colInd in colInds) { bool success = ReplaceMissingsByGaussianForOneColumn(width, shift, data, colInd, r); if (success) { valid.Add(colInd); } } if (valid.Count != data.ColumnCount) { data.ExtractColumns(valid.ToArray()); } }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { if (data.NumericRowCount == 0) { processInfo.ErrString = "Data contains no numerical columns"; } int rowInd = param.GetParam <int>("Numerical row").Value; double[] vals = data.NumericRows[rowInd]; int[] o = ArrayUtils.Order(vals); bool descending = param.GetParam <int>("Order").Value > 0; if (descending) { ArrayUtils.Revert(o); } data.ExtractColumns(o); }
public static void FilterColumns(IMatrixData mdata, Parameters parameters, int[] cols) { bool reduceMatrix = GetReduceMatrix(parameters); if (reduceMatrix) { mdata.ExtractColumns(cols); } else { Array.Sort(cols); string[][] row = new string[mdata.ColumnCount][]; for (int i = 0; i < row.Length; i++) { bool contains = Array.BinarySearch(cols, i) >= 0; row[i] = contains ? new[] { "Keep" } : new[] { "Discard" }; } mdata.AddCategoryRow("Filter", "", row); } }
public static void ReplaceMissingsByGaussianByColumn(double width, double shift, IMatrixData data, int[] colInds) { List <int> invalidMain = new List <int>(); Random2 r = new Random2(7); foreach (int colInd in colInds) { bool success = ReplaceMissingsByGaussianForOneColumn(width, shift, data, colInd, r); if (!success) { if (colInd < data.ColumnCount) { invalidMain.Add(colInd); } } } if (invalidMain.Count > 0) { data.ExtractColumns(ArrayUtils.Complement(invalidMain, data.ColumnCount)); } }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = param.GetParam <int[]>("Main columns").Value; int[] numColInds = param.GetParam <int[]>("Numerical columns").Value; int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value; int[] catColInds = param.GetParam <int[]>("Categorical columns").Value; int[] textColInds = param.GetParam <int[]>("Text columns").Value; data.ExtractColumns(exColInds); data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numColInds); data.NumericColumnNames = ArrayUtils.SubList(data.NumericColumnNames, numColInds); data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds); data.MultiNumericColumns = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds); data.MultiNumericColumnNames = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds); data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds); data.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(data, catColInds); data.CategoryColumnNames = ArrayUtils.SubList(data.CategoryColumnNames, catColInds); data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds); data.StringColumns = ArrayUtils.SubList(data.StringColumns, textColInds); data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, textColInds); data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = GetValidExCols(data); int[] numColInds = GetValidNumCols(data); int[] multiNumColInds = GetValidMultiNumCols(data); int[] catColInds = GetValidCatCols(data); int[] textColInds = GetValidTextCols(data); if (exColInds.Length < data.ColumnCount) { data.ExtractColumns(exColInds); } if (numColInds.Length < data.NumericColumnCount) { data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numColInds); data.NumericColumnNames = ArrayUtils.SubList(data.NumericColumnNames, numColInds); data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds); } if (multiNumColInds.Length < data.MultiNumericColumnCount) { data.MultiNumericColumns = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds); data.MultiNumericColumnNames = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds); data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds); } if (catColInds.Length < data.CategoryColumnCount) { data.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(data, catColInds); data.CategoryColumnNames = ArrayUtils.SubList(data.CategoryColumnNames, catColInds); data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds); } if (textColInds.Length < data.StringColumnCount) { data.StringColumns = ArrayUtils.SubList(data.StringColumns, textColInds); data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, textColInds); data.ColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds); } }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = param.GetParam<int[]>("Main columns").Value; int[] numColInds = param.GetParam<int[]>("Numerical columns").Value; int[] multiNumColInds = param.GetParam<int[]>("Multi-numerical columns").Value; int[] catColInds = param.GetParam<int[]>("Categorical columns").Value; int[] textColInds = param.GetParam<int[]>("Text columns").Value; data.ExtractColumns(exColInds); data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numColInds); data.NumericColumnNames = ArrayUtils.SubList(data.NumericColumnNames, numColInds); data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds); data.MultiNumericColumns = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds); data.MultiNumericColumnNames = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds); data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds); data.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(data, catColInds); data.CategoryColumnNames = ArrayUtils.SubList(data.CategoryColumnNames, catColInds); data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds); data.StringColumns = ArrayUtils.SubList(data.StringColumns, textColInds); data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, textColInds); data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds); }
public static void SetupMDataForInput(IMatrixData data, int[] columnIndx, int[] nameInd, double baseVal) { data.StringColumns = ArrayUtils.SubList(data.StringColumns, nameInd); data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, nameInd); data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, nameInd); List <int> toConvert = new List <int>(); List <int> numList = new List <int>(); int expressInd = 0; foreach (int i in columnIndx) { if (i < data.ColumnCount) { toConvert.Add(i); numList.Add(data.NumericColumnCount + expressInd); expressInd += 1; } else { numList.Add(i - data.ColumnCount); } } int[] numArr = numList.ToArray(); //convert expression to numeric data.ExtractColumns(toConvert.ToArray()); ExpressionToNumeric(Enumerable.Range(0, data.ColumnCount).ToArray(), data); data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numArr); //change data form depending whether needed if (baseVal > 0) { foreach (int col in numArr) { for (int i = 0; i < data.RowCount; i++) { data.NumericColumns[col][i] = Math.Pow(baseVal, data.NumericColumns[col][i]); } } } data.NumericColumnNames = ArrayUtils.SubList(data.NumericColumnNames, numArr); data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numArr); NumericToString(Enumerable.Range(0, numArr.Length).ToArray(), data); for (int j = 0; j < data.StringColumnCount; j++) { for (int i = 0; i < data.RowCount; i++) { data.StringColumns[j][i] = string.Equals(data.StringColumns[j][i], "NaN") ? "NA" : data.StringColumns[j][i]; } } //clearing irrelevant info data.ClearMultiNumericColumns(); data.ClearMultiNumericRows(); data.ClearCategoryColumns(); data.ClearCategoryRows(); }