private static void CategoricalToString(IList <int> colInds, IMatrixData mdata) { int[] inds = ArrayUtils.Complement(colInds, mdata.CategoryColumnCount); string[] names = ArrayUtils.SubArray(mdata.CategoryColumnNames, colInds); string[] descriptions = ArrayUtils.SubArray(mdata.CategoryColumnDescriptions, colInds); string[][][] cat = PerseusPluginUtils.GetCategoryColumns(mdata, colInds).ToArray(); string[][] newString = new string[cat.Length][]; for (int j = 0; j < cat.Length; j++) { newString[j] = new string[cat[j].Length]; for (int i = 0; i < newString[j].Length; i++) { if (cat[j][i] == null || cat[j][i].Length == 0) { newString[j][i] = ""; } else { newString[j][i] = StringUtils.Concat(";", cat[j][i]); } } } mdata.StringColumnNames.AddRange(names); mdata.StringColumnDescriptions.AddRange(descriptions); mdata.StringColumns.AddRange(newString); mdata.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(mdata, inds); mdata.CategoryColumnNames = ArrayUtils.SubList(mdata.CategoryColumnNames, inds); mdata.CategoryColumnDescriptions = ArrayUtils.SubList(mdata.CategoryColumnDescriptions, inds); }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { return (new Parameters( new MultiChoiceParam("Main", Enumerable.Range(0, mdata.ColumnCount).ToArray()) { Values = mdata.ColumnNames }, new MultiChoiceParam("Numeric", Enumerable.Range(0, mdata.NumericColumnCount).ToArray()) { Values = mdata.NumericColumnNames }, new MultiChoiceParam("Text", Enumerable.Range(0, mdata.StringColumnCount).ToArray()) { Values = mdata.StringColumnNames }, new MultiChoiceParam("Category", Enumerable.Range(0, mdata.CategoryColumnCount).ToArray()) { Values = mdata.CategoryColumnNames }, new MultiChoiceParam("MultiNumeric", Enumerable.Range(0, mdata.MultiNumericColumnCount).ToArray()) { Values = mdata.MultiNumericColumnNames }, PerseusPluginUtils.CreateFilterModeParam(true))); }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { Parameters[] subParams = new Parameters[mdata.CategoryRowCount]; for (int i = 0; i < mdata.CategoryRowCount; i++) { string[] values = mdata.GetCategoryRowValuesAt(i); int[] sel = values.Length == 1 ? new[] { 0 } : new int[0]; subParams[i] = new Parameters(new Parameter[] { new MultiChoiceParam("Values", sel) { Values = values, Help = "The value that should be present to discard/keep the corresponding row." } }); } return (new Parameters(new SingleChoiceWithSubParams("Row") { Values = mdata.CategoryRowNames, SubParams = subParams, Help = "The categorical row that the filtering should be based on.", ParamNameWidth = 50, TotalWidth = 731 }, new SingleChoiceParam("Mode") { Values = new[] { "Remove matching columns", "Keep matching columns" }, Help = "If 'Remove matching columns' is selected, rows having the values specified above will be removed while " + "all other rows will be kept. If 'Keep matching columns' is selected, the opposite will happen." }, PerseusPluginUtils.CreateFilterModeParam(false))); }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { string[] selection = ArrayUtils.Concat(mdata.NumericColumnNames, mdata.ColumnNames); return (new Parameters(ArrayUtils.Concat(PerseusUtils.GetNumFilterParams(selection), PerseusPluginUtils.CreateFilterModeParam(true)))); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { const bool rows = true; int minValids = PerseusPluginUtils.GetMinValids(param, out bool percentage); ParameterWithSubParams <int> modeParam = param.GetParamWithSubParams <int>("Mode"); int modeInd = modeParam.Value; if (modeInd != 0 && mdata.CategoryRowNames.Count == 0) { processInfo.ErrString = "No grouping is defined."; return; } PerseusPluginUtils.ReadValuesShouldBeParams(param, out FilteringMode filterMode, out double threshold, out double threshold2); if (modeInd != 0) { int gind = modeParam.GetSubParameters().GetParam <int>("Grouping").Value; string[][] groupCol = mdata.GetCategoryRowAt(gind); NonzeroFilterGroup(minValids, percentage, mdata, param, modeInd == 2, threshold, threshold2, filterMode, groupCol); } else { PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var mainSubset = param.GetParam <int[]>("Main").Value; var mainColumns = mainSubset.Select(mdata.Values.GetColumn).ToArray(); var numericSubset = param.GetParam <int[]>("Numeric").Value; var numericColumns = ArrayUtils.SubList(mdata.NumericColumns, numericSubset); var stringSubset = param.GetParam <int[]>("Text").Value; var stringColumns = ArrayUtils.SubList(mdata.StringColumns, stringSubset); var categorySubset = param.GetParam <int[]>("Category").Value; var categoryColumns = categorySubset.Select(mdata.GetCategoryColumnAt).ToArray(); var multiNumericSubset = param.GetParam <int[]>("MultiNumeric").Value; var multiNumericColumns = ArrayUtils.SubList(mdata.MultiNumericColumns, multiNumericSubset); var rows = new Dictionary <string, int>(); for (int j = 0; j < mdata.RowCount; j++) { int i = j; var row = string.Join("\t", mainColumns.Select(col => $"{col[i]}") .Concat(numericColumns.Select(col => $"{col[i]}")) .Concat(stringColumns.Select(col => $"{col[i]}")) .Concat(categoryColumns.Select(col => string.Join(";", col[i]))) .Concat(multiNumericColumns.Select(col => string.Join(";", col[i].Select(d => $"{d}"))))); if (!rows.ContainsKey(row)) { rows[row] = i; } } PerseusPluginUtils.FilterRows(mdata, param, rows.Values.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { const bool rows = false; bool percentage; int minValids = PerseusPluginUtils.GetMinValids(param, out percentage); ParameterWithSubParams <int> modeParam = param.GetParamWithSubParams <int>("Mode"); int modeInd = modeParam.Value; if (modeInd != 0 && mdata.CategoryRowNames.Count == 0) { processInfo.ErrString = "No grouping is defined."; return; } if (modeInd != 0) { processInfo.ErrString = "Group-wise filtering can only be appled to rows."; return; } FilteringMode filterMode; double threshold; double threshold2; PerseusPluginUtils.ReadValuesShouldBeParams(param, out filterMode, out threshold, out threshold2); if (modeInd != 0) { //TODO } else { PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode); } }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { return (new Parameters(new [] { PerseusPluginUtils.GetMinValuesParam(true), new SingleChoiceWithSubParams("Mode") { Values = new[] { "In total", "In each group", "In at least one group" }, SubParams = new[] { new Parameters(new Parameter[0]), new Parameters(new Parameter[] { new SingleChoiceParam("Grouping") { Values = mdata.CategoryRowNames } }), new Parameters(new Parameter[] { new SingleChoiceParam("Grouping") { Values = mdata.CategoryRowNames } }) }, ParamNameWidth = 50, TotalWidth = 731 }, PerseusPluginUtils.GetValuesShouldBeParam(), PerseusPluginUtils.GetFilterModeParam(false) })); }
public void TestBenjaminiHochbergFdrCorrectionAgainstRWithNaNs() { var pValues = new[] { double.NaN, 0.55418364, 0.33169014, 0.61117003, 0.79263279, 0.74714936, 0.93567141, 0.41151512, 0.99690655, 0.57863046, 0.35048756, double.NaN, 0.17302064, 0.58728787, 0.45285588, 0.67122903, 0.99010006, 0.32346151, 0.02248119, 0.5575581, 0.54179022, 0.30518608 }; var expectedFdrs = new[] { double.NaN, 0.87310004, 0.87310004, 0.87310004, 0.93250916, 0.93250916, 0.99690655, 0.87310004, 0.99690655, 0.87310004, 0.87310004, double.NaN, 0.87310004, 0.87310004, 0.87310004, 0.89497204, 0.99690655, 0.87310004, 0.4496238, 0.87310004, 0.87310004, 0.87310004 }; PerseusPluginUtils.CalcBenjaminiHochbergFdr(pValues, 0.05, out var fdrs); for (int i = 0; i < expectedFdrs.Length; i++) { var expected = expectedFdrs[i]; var actual = fdrs[i]; if (double.IsNaN(expected) && double.IsNaN(actual)) { continue; } Assert.AreEqual(expected, actual, 0.00001); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int colInd = param.GetParam <int>("Column").Value; string searchString = param.GetParam <string>("Search string").Value; bool remove = param.GetParam <int>("Mode").Value == 0; bool matchCase = param.GetParam <bool>("Match case").Value; bool matchWholeWord = param.GetParam <bool>("Match whole word").Value; if (!matchWholeWord && string.IsNullOrEmpty(searchString)) { processInfo.ErrString = "Please provide a search string, or set 'Match whole word' to match empty entries."; return; } string[] vals = mdata.StringColumns[colInd]; List <int> valids = new List <int>(); for (int i = 0; i < vals.Length; i++) { bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord); if (matches && !remove) { valids.Add(i); } else if (!matches && remove) { valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Relation[] relations = PerseusUtils.GetRelationsNumFilter(param, out string errString, out int[] colInds, out bool and); if (errString != null) { processInfo.ErrString = errString; return; } double[][] rows = GetRows(mdata, colInds); List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); for (int i = 0; i < rows.Length; i++) { bool valid = PerseusUtils.IsValidRowNumFilter(rows[i], relations, and); if (!valid) { notvalids.Add(i); } } if (param.GetParam <int>("Filter mode").Value == 2) { supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) }; } PerseusPluginUtils.FilterRowsNew(mdata, param, GetValids(mdata, colInds, relations, and)); }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { return (new Parameters( new SingleChoiceParam("Column") { Values = mdata.StringColumnNames, Help = "The text column that the filtering should be based on." }, new StringParam("Search string") { Help = "String that is searched in the specified column.", Value = "" }, new BoolParam("Match case"), new BoolParam("Match whole word") { Value = true }, new SingleChoiceParam("Mode") { Values = new[] { "Remove matching rows", "Keep matching rows" }, Help = "If 'Remove matching rows' is selected, rows matching the criteria will be removed while " + "all other rows will be kept. If 'Keep matching rows' is selected, the opposite will happen.", Value = 0 }, PerseusPluginUtils.GetFilterModeParam(true) )); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Row"); int colInd = p.Value; if (colInd < 0) { processInfo.ErrString = "No categorical rows available."; return; } Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values"); int[] inds = mcp.Value; if (inds.Length == 0) { processInfo.ErrString = "Please select at least one term for filtering."; return; } string[] values = new string[inds.Length]; string[] v = mdata.GetCategoryRowValuesAt(colInd); for (int i = 0; i < values.Length; i++) { values[i] = v[inds[i]]; } HashSet <string> value = new HashSet <string>(values); bool remove = param.GetParam <int>("Mode").Value == 0; string[][] cats = mdata.GetCategoryRowAt(colInd); List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); for (int i = 0; i < cats.Length; i++) { bool valid = true; foreach (string w in cats[i]) { if (value.Contains(w)) { valid = false; break; } } if (valid && remove || !valid && !remove) { valids.Add(i); } else if (!valid) { notvalids.Add(i); } } if (param.GetParam <int>("Filter mode").Value == 2) { supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplitColumns(mdata, notvalids.ToArray()) }; } PerseusPluginUtils.FilterColumnsNew(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int nrows = param.GetParam<int>("Number of rows").Value; nrows = Math.Min(nrows, mdata.RowCount); Random2 rand = new Random2(7); int[] rows = ArrayUtils.SubArray(rand.NextPermutation(mdata.RowCount), nrows); PerseusPluginUtils.FilterRows(mdata, param, rows); }
public Parameters GetParameters(IMatrixData mdata, ref string errorString) { return (new Parameters(PerseusPluginUtils.GetMinValuesParam(mdata, false), new SingleChoiceWithSubParams("Mode") { Values = new[] { "In total" }, SubParams = { new Parameters(new Parameter[0]) }, ParamNameWidth = 50, TotalWidth = 731 }, PerseusPluginUtils.GetValuesShouldBeParam(), PerseusPluginUtils.CreateFilterModeParam(true))); }
public SelectRowsManuallyControl(IMatrixData mdata, Action <IData> createNewMatrix) { InitializeComponent(); this.mdata = mdata; this.createNewMatrix = createNewMatrix; tableView1.TableModel = new MatrixDataTable(mdata); removeSelectedRowsButton.Click += RemoveSelectedRowsButton_OnClick; keepSelectedRowsButton.Click += KeepSelectedRowsButton_OnClick; removeSelectedRowsButton.Image = GraphUtils.ToBitmap(PerseusPluginUtils.GetImage("hand.png")); keepSelectedRowsButton.Image = GraphUtils.ToBitmap(PerseusPluginUtils.GetImage("hand.png")); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Relation[] relations = PerseusUtils.GetRelationsNumFilter(param, out string errString, out int[] colInds, out bool and); if (errString != null) { processInfo.ErrString = errString; return; } PerseusPluginUtils.FilterRows(mdata, param, GetValids(mdata, colInds, relations, and)); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] cols = param.GetParam <int[]>("Columns").Value; int truncIndex = param.GetParam <int>("Use for truncation").Value; TestTruncation truncation = truncIndex == 0 ? TestTruncation.Pvalue : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased); double threshold = param.GetParam <double>("Threshold value").Value; int sideInd = param.GetParam <int>("Side").Value; TestSide side; switch (sideInd) { case 0: side = TestSide.Both; break; case 1: side = TestSide.Left; break; case 2: side = TestSide.Right; break; default: throw new Exception("Never get here."); } foreach (int col in cols) { BaseVector r = mdata.Values.GetColumn(col); double[] pvals = CalcSignificanceA(r, side); string[][] fdr; switch (truncation) { case TestTruncation.Pvalue: fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold); break; case TestTruncation.BenjaminiHochberg: double[] fdrs; fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold, pvals.Length, out fdrs); break; default: throw new Exception("Never get here."); } mdata.AddNumericColumn(mdata.ColumnNames[col] + " Significance A", "", pvals); mdata.AddCategoryColumn(mdata.ColumnNames[col] + " A significant", "", fdr); } }
private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); double[,] newExCols = new double[mdata.RowCount, groupNames.Length]; double[,] newQuality = new double[mdata.RowCount, groupNames.Length]; bool[,] newImputed = new bool[mdata.RowCount, groupNames.Length]; for (int i = 0; i < newExCols.GetLength(0); i++) { for (int j = 0; j < newExCols.GetLength(1); j++) { List <double> vals = new List <double>(); List <bool> imps = new List <bool>(); foreach (int ind in colInds[j]) { double val = mdata.Values.Get(i, ind); if (!double.IsNaN(val) && !double.IsInfinity(val)) { vals.Add(val); imps.Add(mdata.IsImputed[i, ind]); } } bool imp = false; double xy = double.NaN; if (vals.Count >= validVals) { xy = func(vals); imp = ArrayUtils.Or(imps); } newExCols[i, j] = xy; newQuality[i, j] = double.NaN; newImputed[i, j] = imp; } } mdata.ColumnNames = new List <string>(groupNames); mdata.ColumnDescriptions = GetEmpty(groupNames); mdata.Values.Set(newExCols); mdata.Quality.Set(newQuality); mdata.IsImputed.Set(newImputed); mdata.RemoveCategoryRowAt(groupColInd); for (int i = 0; i < mdata.CategoryRowCount; i++) { mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i); } for (int i = 0; i < mdata.NumericRows.Count; i++) { mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds); } }
private static void NonzeroFilterGroup(int minValids, bool percentage, IMatrixData mdata, Parameters param, bool oneGroup, double threshold, double threshold2, FilteringMode filterMode, IList <string[]> groupCol ) { List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(groupCol); Array.Sort(groupVals); int[][] groupInds = CalcGroupInds(groupVals, groupCol); for (int i = 0; i < mdata.RowCount; i++) { int[] counts = new int[groupVals.Length]; int[] totals = new int[groupVals.Length]; for (int j = 0; j < groupInds.Length; j++) { for (int k = 0; k < groupInds[j].Length; k++) { if (groupInds[j][k] >= 0) { totals[groupInds[j][k]]++; } } if (PerseusPluginUtils.IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode)) { for (int k = 0; k < groupInds[j].Length; k++) { if (groupInds[j][k] >= 0) { counts[groupInds[j][k]]++; } } } } bool[] groupValid = new bool[counts.Length]; for (int j = 0; j < groupValid.Length; j++) { groupValid[j] = PerseusPluginUtils.Valid(counts[j], minValids, percentage, totals[j]); } if (oneGroup ? ArrayUtils.Or(groupValid) : ArrayUtils.And(groupValid)) { valids.Add(i); } else { notvalids.Add(i); } } PerseusPluginUtils.FilterRowsNew(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Column"); int colInd = p.Value; if (colInd < 0) { processInfo.ErrString = "No categorical columns available."; return; } Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values"); int[] inds = mcp.Value; if (inds.Length == 0) { processInfo.ErrString = "Please select at least one term for filtering."; return; } string[] values = new string[inds.Length]; string[] v = mdata.GetCategoryColumnValuesAt(colInd); for (int i = 0; i < values.Length; i++) { values[i] = v[inds[i]]; } HashSet <string> value = new HashSet <string>(values); bool remove = param.GetParam <int>("Mode").Value == 0; List <int> valids = new List <int>(); for (int i = 0; i < mdata.RowCount; i++) { bool valid = true; foreach (string w in mdata.GetCategoryColumnEntryAt(colInd, i)) { if (value.Contains(w)) { valid = false; break; } } if ((valid && remove) || (!valid && !remove)) { valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void TestBenjaminiHochberFdrCorrectionKinactExample() { var pValues = new[] { 0.5, 0.26996402, 0.17923912, 0.29354353, double.NaN }; var expectedFdrs = new[] { 0.5, 0.39139137, 0.39139137, 0.39139137, double.NaN }; PerseusPluginUtils.CalcBenjaminiHochbergFdr(pValues, 0.05, out var fdrs); for (int i = 0; i < expectedFdrs.Length; i++) { Assert.AreEqual(expectedFdrs[i], fdrs[i], 0.00001); } }
public void TestBenjaminiHochbergFdrCorrectionWithSinglePvalue() { var pValues = new[] { 0.55418364, }; var expectedFdrs = new[] { 0.55418364, }; PerseusPluginUtils.CalcBenjaminiHochbergFdr(pValues, 0.05, out var fdrs); for (int i = 0; i < expectedFdrs.Length; i++) { Assert.AreEqual(expectedFdrs[i], fdrs[i], 0.00001); } }
private static void AddStandardDeviation(int groupColInd, int validVals, IMatrixData mdata, int varInd) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); double[][] newNumCols = new double[groupNames.Length][]; for (int i = 0; i < newNumCols.Length; i++) { newNumCols[i] = new double[mdata.RowCount]; } for (int i = 0; i < mdata.RowCount; i++) { for (int j = 0; j < groupNames.Length; j++) { List <double> vals = new List <double>(); foreach (int ind in colInds[j]) { double val = mdata.Values.Get(i, ind); if (!double.IsNaN(val) && !double.IsInfinity(val)) { vals.Add(val); } } double xy = double.NaN; if (vals.Count >= validVals) { if (varInd == 0) { xy = ArrayUtils.StandardDeviation(vals); } else { xy = ArrayUtils.StandardDeviation(vals) / Math.Sqrt(vals.Count); } } newNumCols[j][i] = xy; } } for (int i = 0; i < groupNames.Length; i++) { string name = "stddev " + groupNames[i]; mdata.AddNumericColumn(name, name, newNumCols[i]); } }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] numColInds = param.GetParam <int[]>("Numerical rows").Value; int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical rows").Value; int[] catColInds = param.GetParam <int[]>("Categorical rows").Value; int[] textColInds = param.GetParam <int[]>("Text rows").Value; data.NumericRows = ArrayUtils.SubList(data.NumericRows, numColInds); data.NumericRowNames = ArrayUtils.SubList(data.NumericRowNames, numColInds); data.NumericRowDescriptions = ArrayUtils.SubList(data.NumericRowDescriptions, numColInds); data.MultiNumericRows = ArrayUtils.SubList(data.MultiNumericRows, multiNumColInds); data.MultiNumericRowNames = ArrayUtils.SubList(data.MultiNumericRowNames, multiNumColInds); data.MultiNumericRowDescriptions = ArrayUtils.SubList(data.MultiNumericRowDescriptions, multiNumColInds); data.CategoryRows = PerseusPluginUtils.GetCategoryRows(data, catColInds); data.CategoryRowNames = ArrayUtils.SubList(data.CategoryRowNames, catColInds); data.CategoryRowDescriptions = ArrayUtils.SubList(data.CategoryRowDescriptions, catColInds); data.StringRows = ArrayUtils.SubList(data.StringRows, textColInds); data.StringRowNames = ArrayUtils.SubList(data.StringRowNames, textColInds); data.StringRowDescriptions = ArrayUtils.SubList(data.StringRowDescriptions, textColInds); }
private static void CategoricalToNumeric(IList <int> colInds, IMatrixData mdata) { int[] inds = ArrayUtils.Complement(colInds, mdata.CategoryColumnCount); string[] name = ArrayUtils.SubArray(mdata.CategoryColumnNames, colInds); string[] description = ArrayUtils.SubArray(mdata.CategoryColumnDescriptions, colInds); string[][][] cat = PerseusPluginUtils.GetCategoryColumns(mdata, colInds).ToArray(); var newNum = new double[cat.Length][]; for (int j = 0; j < cat.Length; j++) { newNum[j] = new double[cat[j].Length]; for (int i = 0; i < newNum[j].Length; i++) { if (cat[j][i] == null || cat[j][i].Length == 0) { newNum[j][i] = double.NaN; } else { double x; bool s = double.TryParse(cat[j][i][0], out x); if (s) { newNum[j][i] = x; } else { newNum[j][i] = double.NaN; } } } } mdata.NumericColumnNames.AddRange(name); mdata.NumericColumnDescriptions.AddRange(description); mdata.NumericColumns.AddRange(newNum); mdata.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(mdata, inds); mdata.CategoryColumnNames = ArrayUtils.SubList(mdata.CategoryColumnNames, inds); mdata.CategoryColumnDescriptions = ArrayUtils.SubList(mdata.CategoryColumnDescriptions, inds); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = GetValidExCols(data); int[] numColInds = GetValidNumCols(data); int[] multiNumColInds = GetValidMultiNumCols(data); int[] catColInds = GetValidCatCols(data); int[] textColInds = GetValidTextCols(data); if (exColInds.Length < data.ColumnCount) { data.ExtractColumns(exColInds); } if (numColInds.Length < data.NumericColumnCount) { data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numColInds); data.NumericColumnNames = ArrayUtils.SubList(data.NumericColumnNames, numColInds); data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds); } if (multiNumColInds.Length < data.MultiNumericColumnCount) { data.MultiNumericColumns = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds); data.MultiNumericColumnNames = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds); data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds); } if (catColInds.Length < data.CategoryColumnCount) { data.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(data, catColInds); data.CategoryColumnNames = ArrayUtils.SubList(data.CategoryColumnNames, catColInds); data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds); } if (textColInds.Length < data.StringColumnCount) { data.StringColumns = ArrayUtils.SubList(data.StringColumns, textColInds); data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, textColInds); data.ColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds); } }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = param.GetParam <int[]>("Main columns").Value; int[] numColInds = param.GetParam <int[]>("Numerical columns").Value; int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value; int[] catColInds = param.GetParam <int[]>("Categorical columns").Value; int[] textColInds = param.GetParam <int[]>("Text columns").Value; data.ExtractColumns(exColInds); data.NumericColumns = ArrayUtils.SubList(data.NumericColumns, numColInds); data.NumericColumnNames = ArrayUtils.SubList(data.NumericColumnNames, numColInds); data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds); data.MultiNumericColumns = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds); data.MultiNumericColumnNames = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds); data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds); data.CategoryColumns = PerseusPluginUtils.GetCategoryColumns(data, catColInds); data.CategoryColumnNames = ArrayUtils.SubList(data.CategoryColumnNames, catColInds); data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds); data.StringColumns = ArrayUtils.SubList(data.StringColumns, textColInds); data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, textColInds); // data.ColumnDescriptions = ArrayUtils.SubList(data.ColumnDescriptions, textColInds); // data.ColumnNames = ArrayUtils.SubList(data.ColumnNames, exColInds); // data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds); }
private static void FillMatrixKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func) { string[][] groupCol = mdata.GetCategoryRowAt(groupColInd); string[] groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); int[][] colInds = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); double[][] newNumCols = new double[groupNames.Length][]; for (int i = 0; i < newNumCols.Length; i++) { newNumCols[i] = new double[mdata.RowCount]; } for (int i = 0; i < mdata.RowCount; i++) { for (int j = 0; j < groupNames.Length; j++) { List <double> vals = new List <double>(); foreach (int ind in colInds[j]) { double val = mdata.Values.Get(i, ind); if (!double.IsNaN(val) && !double.IsInfinity(val)) { vals.Add(val); } } double xy = double.NaN; if (vals.Count >= validVals) { xy = func(vals); } newNumCols[j][i] = xy; } } for (int i = 0; i < groupNames.Length; i++) { mdata.AddNumericColumn(groupNames[i], groupNames[i], newNumCols[i]); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> xp = param.GetParamWithSubParams <int>("Expression column selection"); bool groups = xp.Value == 2; string[] groupNames = null; int[][] colIndsGroups = null; if (groups) { int groupRowInd = xp.GetSubParameters().GetParam <int>("Group").Value; string[][] groupCol = mdata.GetCategoryRowAt(groupRowInd); groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol); colIndsGroups = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames); } int[] useCols = xp.Value == 1 ? xp.GetSubParameters().GetParam <int[]>("Columns").Value : ArrayUtils.ConsecutiveInts(mdata.ColumnCount); HashSet <int> w = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value); bool[] include = new bool[procs.Length]; double[][] columns = new double[procs.Length][]; double[][][] columnsG = null; if (groups) { columnsG = new double[procs.Length][][]; for (int i = 0; i < columnsG.Length; i++) { columnsG[i] = new double[groupNames.Length][]; } } for (int i = 0; i < include.Length; i++) { include[i] = w.Contains(i); if (include[i]) { columns[i] = new double[mdata.RowCount]; if (groups) { for (int j = 0; j < groupNames.Length; j++) { columnsG[i][j] = new double[mdata.RowCount]; } } } } for (int i = 0; i < mdata.RowCount; i++) { List <double> v = new List <double>(); foreach (int j in useCols) { double x = mdata.Values.Get(i, j); if (!double.IsNaN(x) && !double.IsInfinity(x)) { v.Add(x); } } for (int j = 0; j < include.Length; j++) { if (include[j]) { columns[j][i] = procs[j].Item2(v); } } if (groups) { List <double>[] vg = new List <double> [groupNames.Length]; for (int j = 0; j < colIndsGroups.Length; j++) { vg[j] = new List <double>(); for (int k = 0; k < colIndsGroups[j].Length; k++) { double x = mdata.Values.Get(i, colIndsGroups[j][k]); if (!double.IsNaN(x) && !double.IsInfinity(x)) { vg[j].Add(x); } } } for (int j = 0; j < include.Length; j++) { if (include[j]) { for (int k = 0; k < groupNames.Length; k++) { columnsG[j][k][i] = procs[j].Item2(vg[k]); } } } } } for (int i = 0; i < include.Length; i++) { if (include[i]) { mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]); if (groups) { for (int k = 0; k < groupNames.Length; k++) { mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]); } } } } }