public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int colInd = param.GetParam <int>("Column").Value; string searchString = param.GetParam <string>("Search string").Value; bool remove = param.GetParam <int>("Mode").Value == 0; bool matchCase = param.GetParam <bool>("Match case").Value; bool matchWholeWord = param.GetParam <bool>("Match whole word").Value; if (!matchWholeWord && string.IsNullOrEmpty(searchString)) { processInfo.ErrString = "Please provide a search string, or set 'Match whole word' to match empty entries."; return; } string[] vals = mdata.StringColumns[colInd]; List <int> valids = new List <int>(); for (int i = 0; i < vals.Length; i++) { bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord); if (matches && !remove) { valids.Add(i); } else if (!matches && remove) { valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var mainSubset = param.GetParam <int[]>("Main").Value; var mainColumns = mainSubset.Select(mdata.Values.GetColumn).ToArray(); var numericSubset = param.GetParam <int[]>("Numeric").Value; var numericColumns = ArrayUtils.SubList(mdata.NumericColumns, numericSubset); var stringSubset = param.GetParam <int[]>("Text").Value; var stringColumns = ArrayUtils.SubList(mdata.StringColumns, stringSubset); var categorySubset = param.GetParam <int[]>("Category").Value; var categoryColumns = categorySubset.Select(mdata.GetCategoryColumnAt).ToArray(); var multiNumericSubset = param.GetParam <int[]>("MultiNumeric").Value; var multiNumericColumns = ArrayUtils.SubList(mdata.MultiNumericColumns, multiNumericSubset); var rows = new Dictionary <string, int>(); for (int j = 0; j < mdata.RowCount; j++) { int i = j; var row = string.Join("\t", mainColumns.Select(col => $"{col[i]}") .Concat(numericColumns.Select(col => $"{col[i]}")) .Concat(stringColumns.Select(col => $"{col[i]}")) .Concat(categoryColumns.Select(col => string.Join(";", col[i]))) .Concat(multiNumericColumns.Select(col => string.Join(";", col[i].Select(d => $"{d}"))))); if (!rows.ContainsKey(row)) { rows[row] = i; } } PerseusPluginUtils.FilterRows(mdata, param, rows.Values.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int nrows = param.GetParam<int>("Number of rows").Value; nrows = Math.Min(nrows, mdata.RowCount); Random2 rand = new Random2(7); int[] rows = ArrayUtils.SubArray(rand.NextPermutation(mdata.RowCount), nrows); PerseusPluginUtils.FilterRows(mdata, param, rows); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Relation[] relations = PerseusUtils.GetRelationsNumFilter(param, out string errString, out int[] colInds, out bool and); if (errString != null) { processInfo.ErrString = errString; return; } PerseusPluginUtils.FilterRows(mdata, param, GetValids(mdata, colInds, relations, and)); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Column"); int colInd = p.Value; if (colInd < 0) { processInfo.ErrString = "No categorical columns available."; return; } Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values"); int[] inds = mcp.Value; if (inds.Length == 0) { processInfo.ErrString = "Please select at least one term for filtering."; return; } string[] values = new string[inds.Length]; string[] v = mdata.GetCategoryColumnValuesAt(colInd); for (int i = 0; i < values.Length; i++) { values[i] = v[inds[i]]; } HashSet <string> value = new HashSet <string>(values); bool remove = param.GetParam <int>("Mode").Value == 0; List <int> valids = new List <int>(); for (int i = 0; i < mdata.RowCount; i++) { bool valid = true; foreach (string w in mdata.GetCategoryColumnEntryAt(colInd, i)) { if (value.Contains(w)) { valid = false; break; } } if ((valid && remove) || (!valid && !remove)) { valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }
private static void NonzeroFilterGroup(int minValids, bool percentage, IMatrixData mdata, Parameters param, bool oneGroup, double threshold, double threshold2, FilteringMode filterMode, IList <string[]> groupCol) { List <int> valids = new List <int>(); string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(groupCol); Array.Sort(groupVals); int[][] groupInds = CalcGroupInds(groupVals, groupCol); for (int i = 0; i < mdata.RowCount; i++) { int[] counts = new int[groupVals.Length]; int[] totals = new int[groupVals.Length]; for (int j = 0; j < groupInds.Length; j++) { for (int k = 0; k < groupInds[j].Length; k++) { if (groupInds[j][k] >= 0) { totals[groupInds[j][k]]++; } } if (PerseusPluginUtils.IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode)) { for (int k = 0; k < groupInds[j].Length; k++) { if (groupInds[j][k] >= 0) { counts[groupInds[j][k]]++; } } } } bool[] groupValid = new bool[counts.Length]; for (int j = 0; j < groupValid.Length; j++) { groupValid[j] = PerseusPluginUtils.Valid(counts[j], minValids, percentage, totals[j]); } if (oneGroup ? ArrayUtils.Or(groupValid) : ArrayUtils.And(groupValid)) { valids.Add(i); } } PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray()); }