public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            Relation[] relations = PerseusUtils.GetRelationsNumFilter(param, out string errString, out int[] colInds, out bool and);
            if (errString != null)
            {
                processInfo.ErrString = errString;
                return;
            }

            double[][] rows      = GetRows(mdata, colInds);
            List <int> valids    = new List <int>();
            List <int> notvalids = new List <int>();

            for (int i = 0; i < rows.Length; i++)
            {
                bool valid = PerseusUtils.IsValidRowNumFilter(rows[i], relations, and);

                if (!valid)
                {
                    notvalids.Add(i);
                }
            }
            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) };
            }
            PerseusPluginUtils.FilterRowsNew(mdata, param, GetValids(mdata, colInds, relations, and));
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Column");
            int colInd = p.Value;

            if (colInd < 0)
            {
                processInfo.ErrString = "No categorical columns available.";
                return;
            }
            Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values");

            int[] inds = mcp.Value;
            if (inds.Length == 0)
            {
                processInfo.ErrString = "Please select at least one term for filtering.";
                return;
            }
            string[] values = new string[inds.Length];
            string[] v      = mdata.GetCategoryColumnValuesAt(colInd);
            for (int i = 0; i < values.Length; i++)
            {
                values[i] = v[inds[i]];
            }
            HashSet <string> value     = new HashSet <string>(values);
            bool             remove    = param.GetParam <int>("Mode").Value == 0;
            List <int>       valids    = new List <int>();
            List <int>       notvalids = new List <int>();

            for (int i = 0; i < mdata.RowCount; i++)
            {
                bool valid = true;
                foreach (string w in mdata.GetCategoryColumnEntryAt(colInd, i))
                {
                    if (value.Contains(w))
                    {
                        valid = false;
                        break;
                    }
                }
                if (valid && remove || !valid && !remove)
                {
                    valids.Add(i);
                }
                else if (!valid)
                {
                    notvalids.Add(i);
                }
            }
            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) };
            }
            PerseusPluginUtils.FilterRowsNew(mdata, param, valids.ToArray());
        }
        private static void NonzeroFilterGroup(int minValids, bool percentage, IMatrixData mdata, Parameters param,
                                               bool oneGroup, double threshold, double threshold2, FilteringMode filterMode, IList <string[]> groupCol
                                               )
        {
            List <int> valids    = new List <int>();
            List <int> notvalids = new List <int>();

            string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
            Array.Sort(groupVals);
            int[][] groupInds = CalcGroupInds(groupVals, groupCol);
            for (int i = 0; i < mdata.RowCount; i++)
            {
                int[] counts = new int[groupVals.Length];
                int[] totals = new int[groupVals.Length];
                for (int j = 0; j < groupInds.Length; j++)
                {
                    for (int k = 0; k < groupInds[j].Length; k++)
                    {
                        if (groupInds[j][k] >= 0)
                        {
                            totals[groupInds[j][k]]++;
                        }
                    }
                    if (PerseusPluginUtils.IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode))
                    {
                        for (int k = 0; k < groupInds[j].Length; k++)
                        {
                            if (groupInds[j][k] >= 0)
                            {
                                counts[groupInds[j][k]]++;
                            }
                        }
                    }
                }
                bool[] groupValid = new bool[counts.Length];
                for (int j = 0; j < groupValid.Length; j++)
                {
                    groupValid[j] = PerseusPluginUtils.Valid(counts[j], minValids, percentage, totals[j]);
                }
                if (oneGroup ? ArrayUtils.Or(groupValid) : ArrayUtils.And(groupValid))
                {
                    valids.Add(i);
                }
                else
                {
                    notvalids.Add(i);
                }
            }
            PerseusPluginUtils.FilterRowsNew(mdata, param, valids.ToArray());
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            var mainSubset          = param.GetParam <int[]>("Main").Value;
            var mainColumns         = mainSubset.Select(mdata.Values.GetColumn).ToArray();
            var numericSubset       = param.GetParam <int[]>("Numeric").Value;
            var numericColumns      = ArrayUtils.SubList(mdata.NumericColumns, numericSubset);
            var stringSubset        = param.GetParam <int[]>("Text").Value;
            var stringColumns       = ArrayUtils.SubList(mdata.StringColumns, stringSubset);
            var categorySubset      = param.GetParam <int[]>("Category").Value;
            var categoryColumns     = categorySubset.Select(mdata.GetCategoryColumnAt).ToArray();
            var multiNumericSubset  = param.GetParam <int[]>("MultiNumeric").Value;
            var multiNumericColumns = ArrayUtils.SubList(mdata.MultiNumericColumns, multiNumericSubset);
            var rows        = new Dictionary <string, int>();
            var discardrows = new Dictionary <string, int>();

            for (int j = 0; j < mdata.RowCount; j++)
            {
                int i   = j;
                var row = string.Join("\t", mainColumns.Select(col => $"{col[i]}")
                                      .Concat(numericColumns.Select(col => $"{col[i]}"))
                                      .Concat(stringColumns.Select(col => $"{col[i]}"))
                                      .Concat(categoryColumns.Select(col => string.Join(";", col[i])))
                                      .Concat(multiNumericColumns.Select(col => string.Join(";", col[i].Select(d => $"{d}")))));
                if (!rows.ContainsKey(row))
                {
                    rows[row] = i;
                }
            }

            PerseusPluginUtils.FilterRowsNew(mdata, param, rows.Values.ToArray());
            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                for (int j = 0; j < mdata.RowCount; j++)
                {
                    int i   = j;
                    var row = string.Join("\t", mainColumns.Select(col => $"{col[i]}")
                                          .Concat(numericColumns.Select(col => $"{col[i]}"))
                                          .Concat(stringColumns.Select(col => $"{col[i]}"))
                                          .Concat(categoryColumns.Select(col => string.Join(";", col[i])))
                                          .Concat(multiNumericColumns.Select(col => string.Join(";", col[i].Select(d => $"{d}")))));
                    if (rows.ContainsKey(row))
                    {
                        discardrows[row] = j;
                    }
                }
                supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, discardrows.Values.ToArray()) };
            }
        }
Beispiel #5
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                supplTables = new[] { PerseusPluginUtils.CreateSupplTab(mdata) };
            }
            int nrows = param.GetParam <int>("Number of rows").Value;

            nrows = Math.Min(nrows, mdata.RowCount);
            Random2 rand = new Random2(7);

            int[] rows = ArrayUtils.SubArray(rand.NextPermutation(mdata.RowCount), nrows);
            PerseusPluginUtils.FilterRowsNew(mdata, param, rows);
        }
Beispiel #6
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                supplTables = new[] { PerseusPluginUtils.CreateSupplTab(mdata) };
            }
            int    colInd         = param.GetParam <int>("Column").Value;
            string searchString   = param.GetParam <string>("Search string").Value;
            bool   remove         = param.GetParam <int>("Mode").Value == 0;
            bool   matchCase      = param.GetParam <bool>("Match case").Value;
            bool   matchWholeWord = param.GetParam <bool>("Match whole word").Value;

            if (!matchWholeWord && string.IsNullOrEmpty(searchString))
            {
                processInfo.ErrString = "Please provide a search string, or set 'Match whole word' to match empty entries.";
                return;
            }
            string[]   vals      = mdata.StringColumns[colInd];
            List <int> valids    = new List <int>();
            List <int> notvalids = new List <int>();

            for (int i = 0; i < vals.Length; i++)
            {
                bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord);
                if (matches && !remove)
                {
                    valids.Add(i);
                }
                else if (!matches && remove)
                {
                    valids.Add(i);
                }
                else
                {
                    notvalids.Add(i);
                }
            }

            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) };
            }
            PerseusPluginUtils.FilterRowsNew(mdata, param, valids.ToArray());
        }