public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { Relation[] relations = PerseusUtils.GetRelationsNumFilter(param, out string errString, out int[] colInds, out bool and); if (errString != null) { processInfo.ErrString = errString; return; } double[][] rows = GetRows(mdata, colInds); List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); for (int i = 0; i < rows.Length; i++) { bool valid = PerseusUtils.IsValidRowNumFilter(rows[i], relations, and); if (!valid) { notvalids.Add(i); } } if (param.GetParam <int>("Filter mode").Value == 2) { supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) }; } PerseusPluginUtils.FilterRowsNew(mdata, param, GetValids(mdata, colInds, relations, and)); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Column"); int colInd = p.Value; if (colInd < 0) { processInfo.ErrString = "No categorical columns available."; return; } Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values"); int[] inds = mcp.Value; if (inds.Length == 0) { processInfo.ErrString = "Please select at least one term for filtering."; return; } string[] values = new string[inds.Length]; string[] v = mdata.GetCategoryColumnValuesAt(colInd); for (int i = 0; i < values.Length; i++) { values[i] = v[inds[i]]; } HashSet <string> value = new HashSet <string>(values); bool remove = param.GetParam <int>("Mode").Value == 0; List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); for (int i = 0; i < mdata.RowCount; i++) { bool valid = true; foreach (string w in mdata.GetCategoryColumnEntryAt(colInd, i)) { if (value.Contains(w)) { valid = false; break; } } if (valid && remove || !valid && !remove) { valids.Add(i); } else if (!valid) { notvalids.Add(i); } } if (param.GetParam <int>("Filter mode").Value == 2) { supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) }; } PerseusPluginUtils.FilterRowsNew(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var mainSubset = param.GetParam <int[]>("Main").Value; var mainColumns = mainSubset.Select(mdata.Values.GetColumn).ToArray(); var numericSubset = param.GetParam <int[]>("Numeric").Value; var numericColumns = ArrayUtils.SubList(mdata.NumericColumns, numericSubset); var stringSubset = param.GetParam <int[]>("Text").Value; var stringColumns = ArrayUtils.SubList(mdata.StringColumns, stringSubset); var categorySubset = param.GetParam <int[]>("Category").Value; var categoryColumns = categorySubset.Select(mdata.GetCategoryColumnAt).ToArray(); var multiNumericSubset = param.GetParam <int[]>("MultiNumeric").Value; var multiNumericColumns = ArrayUtils.SubList(mdata.MultiNumericColumns, multiNumericSubset); var rows = new Dictionary <string, int>(); var discardrows = new Dictionary <string, int>(); for (int j = 0; j < mdata.RowCount; j++) { int i = j; var row = string.Join("\t", mainColumns.Select(col => $"{col[i]}") .Concat(numericColumns.Select(col => $"{col[i]}")) .Concat(stringColumns.Select(col => $"{col[i]}")) .Concat(categoryColumns.Select(col => string.Join(";", col[i]))) .Concat(multiNumericColumns.Select(col => string.Join(";", col[i].Select(d => $"{d}"))))); if (!rows.ContainsKey(row)) { rows[row] = i; } } PerseusPluginUtils.FilterRowsNew(mdata, param, rows.Values.ToArray()); if (param.GetParam <int>("Filter mode").Value == 2) { for (int j = 0; j < mdata.RowCount; j++) { int i = j; var row = string.Join("\t", mainColumns.Select(col => $"{col[i]}") .Concat(numericColumns.Select(col => $"{col[i]}")) .Concat(stringColumns.Select(col => $"{col[i]}")) .Concat(categoryColumns.Select(col => string.Join(";", col[i]))) .Concat(multiNumericColumns.Select(col => string.Join(";", col[i].Select(d => $"{d}"))))); if (rows.ContainsKey(row)) { discardrows[row] = j; } } supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, discardrows.Values.ToArray()) }; } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { if (param.GetParam <int>("Filter mode").Value == 2) { supplTables = new[] { PerseusPluginUtils.CreateSupplTab(mdata) }; } int colInd = param.GetParam <int>("Column").Value; string searchString = param.GetParam <string>("Search string").Value; bool remove = param.GetParam <int>("Mode").Value == 0; bool matchCase = param.GetParam <bool>("Match case").Value; bool matchWholeWord = param.GetParam <bool>("Match whole word").Value; if (!matchWholeWord && string.IsNullOrEmpty(searchString)) { processInfo.ErrString = "Please provide a search string, or set 'Match whole word' to match empty entries."; return; } string[] vals = mdata.StringColumns[colInd]; List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); for (int i = 0; i < vals.Length; i++) { bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord); if (matches && !remove) { valids.Add(i); } else if (!matches && remove) { valids.Add(i); } else { notvalids.Add(i); } } if (param.GetParam <int>("Filter mode").Value == 2) { supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) }; } PerseusPluginUtils.FilterRowsNew(mdata, param, valids.ToArray()); }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { const bool rows = true; int minValids = PerseusPluginUtils.GetMinValids(param, out bool percentage); ParameterWithSubParams <int> modeParam = param.GetParamWithSubParams <int>("Mode"); int modeInd = modeParam.Value; if (modeInd != 0 && mdata.CategoryRowNames.Count == 0) { processInfo.ErrString = "No grouping is defined."; return; } PerseusPluginUtils.ReadValuesShouldBeParams(param, out FilteringMode filterMode, out double threshold, out double threshold2); if (modeInd != 0) { int gind = modeParam.GetSubParameters().GetParam <int>("Grouping").Value; string[][] groupCol = mdata.GetCategoryRowAt(gind); if (param.GetParam <int>("Filter mode").Value == 2) { //discarded List <int> valids = new List <int>(); List <int> notvalids = new List <int>(); string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(groupCol); Array.Sort(groupVals); int[][] groupInds = CalcGroupInds(groupVals, groupCol); for (int i = 0; i < mdata.RowCount; i++) { int[] counts = new int[groupVals.Length]; int[] totals = new int[groupVals.Length]; for (int j = 0; j < groupInds.Length; j++) { for (int k = 0; k < groupInds[j].Length; k++) { if (groupInds[j][k] >= 0) { totals[groupInds[j][k]]++; } } if (PerseusPluginUtils.IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode)) { for (int k = 0; k < groupInds[j].Length; k++) { if (groupInds[j][k] >= 0) { counts[groupInds[j][k]]++; } } } } bool[] groupValid = new bool[counts.Length]; for (int j = 0; j < groupValid.Length; j++) { groupValid[j] = PerseusPluginUtils.Valid(counts[j], minValids, percentage, totals[j]); } if (modeInd == 2 ? ArrayUtils.Or(groupValid) : ArrayUtils.And(groupValid)) { valids.Add(i); } else { notvalids.Add(i); } } supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) }; } NonzeroFilterGroup(minValids, percentage, mdata, param, modeInd == 2, threshold, threshold2, filterMode, groupCol); } else { if (param.GetParam <int>("Filter mode").Value == 2) { supplTables = new[] { PerseusPluginUtils.NonzeroFilter1Split(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode) }; } PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode); } }