Example #1
0
 private static void CategoricalToString(IList <int> colInds, IMatrixData mdata)
 {
     int[]        inds         = ArrayUtils.Complement(colInds, mdata.CategoryColumnCount);
     string[]     names        = ArrayUtils.SubArray(mdata.CategoryColumnNames, colInds);
     string[]     descriptions = ArrayUtils.SubArray(mdata.CategoryColumnDescriptions, colInds);
     string[][][] cat          = PerseusPluginUtils.GetCategoryColumns(mdata, colInds).ToArray();
     string[][]   newString    = new string[cat.Length][];
     for (int j = 0; j < cat.Length; j++)
     {
         newString[j] = new string[cat[j].Length];
         for (int i = 0; i < newString[j].Length; i++)
         {
             if (cat[j][i] == null || cat[j][i].Length == 0)
             {
                 newString[j][i] = "";
             }
             else
             {
                 newString[j][i] = StringUtils.Concat(";", cat[j][i]);
             }
         }
     }
     mdata.StringColumnNames.AddRange(names);
     mdata.StringColumnDescriptions.AddRange(descriptions);
     mdata.StringColumns.AddRange(newString);
     mdata.CategoryColumns            = PerseusPluginUtils.GetCategoryColumns(mdata, inds);
     mdata.CategoryColumnNames        = ArrayUtils.SubList(mdata.CategoryColumnNames, inds);
     mdata.CategoryColumnDescriptions = ArrayUtils.SubList(mdata.CategoryColumnDescriptions, inds);
 }
 public Parameters GetParameters(IMatrixData mdata, ref string errorString)
 {
     return
         (new Parameters(
              new MultiChoiceParam("Main", Enumerable.Range(0, mdata.ColumnCount).ToArray())
     {
         Values = mdata.ColumnNames
     },
              new MultiChoiceParam("Numeric", Enumerable.Range(0, mdata.NumericColumnCount).ToArray())
     {
         Values = mdata.NumericColumnNames
     },
              new MultiChoiceParam("Text", Enumerable.Range(0, mdata.StringColumnCount).ToArray())
     {
         Values = mdata.StringColumnNames
     },
              new MultiChoiceParam("Category", Enumerable.Range(0, mdata.CategoryColumnCount).ToArray())
     {
         Values = mdata.CategoryColumnNames
     },
              new MultiChoiceParam("MultiNumeric", Enumerable.Range(0, mdata.MultiNumericColumnCount).ToArray())
     {
         Values = mdata.MultiNumericColumnNames
     },
              PerseusPluginUtils.CreateFilterModeParam(true)));
 }
 public Parameters GetParameters(IMatrixData mdata, ref string errorString)
 {
     Parameters[] subParams = new Parameters[mdata.CategoryRowCount];
     for (int i = 0; i < mdata.CategoryRowCount; i++)
     {
         string[] values = mdata.GetCategoryRowValuesAt(i);
         int[]    sel    = values.Length == 1 ? new[] { 0 } : new int[0];
         subParams[i] =
             new Parameters(new Parameter[] {
             new MultiChoiceParam("Values", sel)
             {
                 Values = values,
                 Help   = "The value that should be present to discard/keep the corresponding row."
             }
         });
     }
     return
         (new Parameters(new SingleChoiceWithSubParams("Row")
     {
         Values = mdata.CategoryRowNames,
         SubParams = subParams,
         Help = "The categorical row that the filtering should be based on.",
         ParamNameWidth = 50,
         TotalWidth = 731
     }, new SingleChoiceParam("Mode")
     {
         Values = new[] { "Remove matching columns", "Keep matching columns" },
         Help =
             "If 'Remove matching columns' is selected, rows having the values specified above will be removed while " +
             "all other rows will be kept. If 'Keep matching columns' is selected, the opposite will happen."
     }, PerseusPluginUtils.CreateFilterModeParam(false)));
 }
Example #4
0
 public Parameters GetParameters(IMatrixData mdata, ref string errorString)
 {
     string[] selection = ArrayUtils.Concat(mdata.NumericColumnNames, mdata.ColumnNames);
     return
         (new Parameters(ArrayUtils.Concat(PerseusUtils.GetNumFilterParams(selection),
                                           PerseusPluginUtils.CreateFilterModeParam(true))));
 }
Example #5
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            const bool rows      = true;
            int        minValids = PerseusPluginUtils.GetMinValids(param, out bool percentage);
            ParameterWithSubParams <int> modeParam = param.GetParamWithSubParams <int>("Mode");
            int modeInd = modeParam.Value;

            if (modeInd != 0 && mdata.CategoryRowNames.Count == 0)
            {
                processInfo.ErrString = "No grouping is defined.";
                return;
            }
            PerseusPluginUtils.ReadValuesShouldBeParams(param, out FilteringMode filterMode, out double threshold, out double threshold2);
            if (modeInd != 0)
            {
                int        gind     = modeParam.GetSubParameters().GetParam <int>("Grouping").Value;
                string[][] groupCol = mdata.GetCategoryRowAt(gind);
                NonzeroFilterGroup(minValids, percentage, mdata, param, modeInd == 2, threshold, threshold2, filterMode, groupCol);
            }
            else
            {
                PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            var mainSubset          = param.GetParam <int[]>("Main").Value;
            var mainColumns         = mainSubset.Select(mdata.Values.GetColumn).ToArray();
            var numericSubset       = param.GetParam <int[]>("Numeric").Value;
            var numericColumns      = ArrayUtils.SubList(mdata.NumericColumns, numericSubset);
            var stringSubset        = param.GetParam <int[]>("Text").Value;
            var stringColumns       = ArrayUtils.SubList(mdata.StringColumns, stringSubset);
            var categorySubset      = param.GetParam <int[]>("Category").Value;
            var categoryColumns     = categorySubset.Select(mdata.GetCategoryColumnAt).ToArray();
            var multiNumericSubset  = param.GetParam <int[]>("MultiNumeric").Value;
            var multiNumericColumns = ArrayUtils.SubList(mdata.MultiNumericColumns, multiNumericSubset);
            var rows = new Dictionary <string, int>();

            for (int j = 0; j < mdata.RowCount; j++)
            {
                int i   = j;
                var row = string.Join("\t", mainColumns.Select(col => $"{col[i]}")
                                      .Concat(numericColumns.Select(col => $"{col[i]}"))
                                      .Concat(stringColumns.Select(col => $"{col[i]}"))
                                      .Concat(categoryColumns.Select(col => string.Join(";", col[i])))
                                      .Concat(multiNumericColumns.Select(col => string.Join(";", col[i].Select(d => $"{d}")))));
                if (!rows.ContainsKey(row))
                {
                    rows[row] = i;
                }
            }

            PerseusPluginUtils.FilterRows(mdata, param, rows.Values.ToArray());
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            const bool rows = false;
            bool       percentage;
            int        minValids = PerseusPluginUtils.GetMinValids(param, out percentage);
            ParameterWithSubParams <int> modeParam = param.GetParamWithSubParams <int>("Mode");
            int modeInd = modeParam.Value;

            if (modeInd != 0 && mdata.CategoryRowNames.Count == 0)
            {
                processInfo.ErrString = "No grouping is defined.";
                return;
            }
            if (modeInd != 0)
            {
                processInfo.ErrString = "Group-wise filtering can only be appled to rows.";
                return;
            }
            FilteringMode filterMode;
            double        threshold;
            double        threshold2;

            PerseusPluginUtils.ReadValuesShouldBeParams(param, out filterMode, out threshold, out threshold2);
            if (modeInd != 0)
            {
                //TODO
            }
            else
            {
                PerseusPluginUtils.NonzeroFilter1(rows, minValids, percentage, mdata, param, threshold, threshold2, filterMode);
            }
        }
Example #8
0
 public Parameters GetParameters(IMatrixData mdata, ref string errorString)
 {
     return
         (new Parameters(new [] {
         PerseusPluginUtils.GetMinValuesParam(true),
         new SingleChoiceWithSubParams("Mode")
         {
             Values = new[] { "In total", "In each group", "In at least one group" },
             SubParams = new[] {
                 new Parameters(new Parameter[0]),
                 new Parameters(new Parameter[] { new SingleChoiceParam("Grouping")
                                                  {
                                                      Values = mdata.CategoryRowNames
                                                  } }),
                 new Parameters(new Parameter[] { new SingleChoiceParam("Grouping")
                                                  {
                                                      Values = mdata.CategoryRowNames
                                                  } })
             },
             ParamNameWidth = 50,
             TotalWidth = 731
         },
         PerseusPluginUtils.GetValuesShouldBeParam(), PerseusPluginUtils.GetFilterModeParam(false)
     }));
 }
        public void TestBenjaminiHochbergFdrCorrectionAgainstRWithNaNs()
        {
            var pValues = new[]
            {
                double.NaN,
                0.55418364, 0.33169014, 0.61117003, 0.79263279, 0.74714936,
                0.93567141, 0.41151512, 0.99690655, 0.57863046, 0.35048756, double.NaN,
                0.17302064, 0.58728787, 0.45285588, 0.67122903, 0.99010006,
                0.32346151, 0.02248119, 0.5575581, 0.54179022, 0.30518608
            };
            var expectedFdrs = new[]
            {
                double.NaN,
                0.87310004, 0.87310004, 0.87310004, 0.93250916, 0.93250916,
                0.99690655, 0.87310004, 0.99690655, 0.87310004, 0.87310004, double.NaN,
                0.87310004, 0.87310004, 0.87310004, 0.89497204, 0.99690655,
                0.87310004, 0.4496238, 0.87310004, 0.87310004, 0.87310004
            };

            PerseusPluginUtils.CalcBenjaminiHochbergFdr(pValues, 0.05, out var fdrs);
            for (int i = 0; i < expectedFdrs.Length; i++)
            {
                var expected = expectedFdrs[i];
                var actual   = fdrs[i];
                if (double.IsNaN(expected) && double.IsNaN(actual))
                {
                    continue;
                }
                Assert.AreEqual(expected, actual, 0.00001);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int    colInd         = param.GetParam <int>("Column").Value;
            string searchString   = param.GetParam <string>("Search string").Value;
            bool   remove         = param.GetParam <int>("Mode").Value == 0;
            bool   matchCase      = param.GetParam <bool>("Match case").Value;
            bool   matchWholeWord = param.GetParam <bool>("Match whole word").Value;

            if (!matchWholeWord && string.IsNullOrEmpty(searchString))
            {
                processInfo.ErrString = "Please provide a search string, or set 'Match whole word' to match empty entries.";
                return;
            }
            string[]   vals   = mdata.StringColumns[colInd];
            List <int> valids = new List <int>();

            for (int i = 0; i < vals.Length; i++)
            {
                bool matches = Matches(vals[i], searchString, matchCase, matchWholeWord);
                if (matches && !remove)
                {
                    valids.Add(i);
                }
                else if (!matches && remove)
                {
                    valids.Add(i);
                }
            }
            PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray());
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            Relation[] relations = PerseusUtils.GetRelationsNumFilter(param, out string errString, out int[] colInds, out bool and);
            if (errString != null)
            {
                processInfo.ErrString = errString;
                return;
            }

            double[][] rows      = GetRows(mdata, colInds);
            List <int> valids    = new List <int>();
            List <int> notvalids = new List <int>();

            for (int i = 0; i < rows.Length; i++)
            {
                bool valid = PerseusUtils.IsValidRowNumFilter(rows[i], relations, and);

                if (!valid)
                {
                    notvalids.Add(i);
                }
            }
            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplit(mdata, notvalids.ToArray()) };
            }
            PerseusPluginUtils.FilterRowsNew(mdata, param, GetValids(mdata, colInds, relations, and));
        }
 public Parameters GetParameters(IMatrixData mdata, ref string errorString)
 {
     return
         (new Parameters(
              new SingleChoiceParam("Column")
     {
         Values = mdata.StringColumnNames,
         Help = "The text column that the filtering should be based on."
     },
              new StringParam("Search string")
     {
         Help = "String that is searched in the specified column.",
         Value = ""
     },
              new BoolParam("Match case"), new BoolParam("Match whole word")
     {
         Value = true
     },
              new SingleChoiceParam("Mode")
     {
         Values = new[] { "Remove matching rows", "Keep matching rows" },
         Help =
             "If 'Remove matching rows' is selected, rows matching the criteria will be removed while " +
             "all other rows will be kept. If 'Keep matching rows' is selected, the opposite will happen.",
         Value = 0
     },
              PerseusPluginUtils.GetFilterModeParam(true)
              ));
 }
Example #13
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Row");
            int colInd = p.Value;

            if (colInd < 0)
            {
                processInfo.ErrString = "No categorical rows available.";
                return;
            }
            Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values");

            int[] inds = mcp.Value;
            if (inds.Length == 0)
            {
                processInfo.ErrString = "Please select at least one term for filtering.";
                return;
            }
            string[] values = new string[inds.Length];
            string[] v      = mdata.GetCategoryRowValuesAt(colInd);
            for (int i = 0; i < values.Length; i++)
            {
                values[i] = v[inds[i]];
            }
            HashSet <string> value  = new HashSet <string>(values);
            bool             remove = param.GetParam <int>("Mode").Value == 0;

            string[][] cats      = mdata.GetCategoryRowAt(colInd);
            List <int> valids    = new List <int>();
            List <int> notvalids = new List <int>();

            for (int i = 0; i < cats.Length; i++)
            {
                bool valid = true;
                foreach (string w in cats[i])
                {
                    if (value.Contains(w))
                    {
                        valid = false;
                        break;
                    }
                }
                if (valid && remove || !valid && !remove)
                {
                    valids.Add(i);
                }
                else if (!valid)
                {
                    notvalids.Add(i);
                }
            }
            if (param.GetParam <int>("Filter mode").Value == 2)
            {
                supplTables = new[] { PerseusPluginUtils.CreateSupplTabSplitColumns(mdata, notvalids.ToArray()) };
            }
            PerseusPluginUtils.FilterColumnsNew(mdata, param, valids.ToArray());
        }
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
     ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int nrows = param.GetParam<int>("Number of rows").Value;
     nrows = Math.Min(nrows, mdata.RowCount);
     Random2 rand = new Random2(7);
     int[] rows = ArrayUtils.SubArray(rand.NextPermutation(mdata.RowCount), nrows);
     PerseusPluginUtils.FilterRows(mdata, param, rows);
 }
Example #15
0
 public Parameters GetParameters(IMatrixData mdata, ref string errorString)
 {
     return
         (new Parameters(PerseusPluginUtils.GetMinValuesParam(mdata, false), new SingleChoiceWithSubParams("Mode")
     {
         Values = new[] { "In total" },
         SubParams = { new Parameters(new Parameter[0]) },
         ParamNameWidth = 50,
         TotalWidth = 731
     }, PerseusPluginUtils.GetValuesShouldBeParam(), PerseusPluginUtils.CreateFilterModeParam(true)));
 }
Example #16
0
 public SelectRowsManuallyControl(IMatrixData mdata, Action <IData> createNewMatrix)
 {
     InitializeComponent();
     this.mdata                      = mdata;
     this.createNewMatrix            = createNewMatrix;
     tableView1.TableModel           = new MatrixDataTable(mdata);
     removeSelectedRowsButton.Click += RemoveSelectedRowsButton_OnClick;
     keepSelectedRowsButton.Click   += KeepSelectedRowsButton_OnClick;
     removeSelectedRowsButton.Image  = GraphUtils.ToBitmap(PerseusPluginUtils.GetImage("hand.png"));
     keepSelectedRowsButton.Image    = GraphUtils.ToBitmap(PerseusPluginUtils.GetImage("hand.png"));
 }
Example #17
0
 public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     Relation[] relations = PerseusUtils.GetRelationsNumFilter(param, out string errString, out int[] colInds, out bool and);
     if (errString != null)
     {
         processInfo.ErrString = errString;
         return;
     }
     PerseusPluginUtils.FilterRows(mdata, param, GetValids(mdata, colInds, relations, and));
 }
Example #18
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            int[]          cols       = param.GetParam <int[]>("Columns").Value;
            int            truncIndex = param.GetParam <int>("Use for truncation").Value;
            TestTruncation truncation = truncIndex == 0
                                ? TestTruncation.Pvalue
                                : (truncIndex == 1 ? TestTruncation.BenjaminiHochberg : TestTruncation.PermutationBased);
            double   threshold = param.GetParam <double>("Threshold value").Value;
            int      sideInd   = param.GetParam <int>("Side").Value;
            TestSide side;

            switch (sideInd)
            {
            case 0:
                side = TestSide.Both;
                break;

            case 1:
                side = TestSide.Left;
                break;

            case 2:
                side = TestSide.Right;
                break;

            default:
                throw new Exception("Never get here.");
            }
            foreach (int col in cols)
            {
                BaseVector r     = mdata.Values.GetColumn(col);
                double[]   pvals = CalcSignificanceA(r, side);
                string[][] fdr;
                switch (truncation)
                {
                case TestTruncation.Pvalue:
                    fdr = PerseusPluginUtils.CalcPvalueSignificance(pvals, threshold);
                    break;

                case TestTruncation.BenjaminiHochberg:
                    double[] fdrs;
                    fdr = PerseusPluginUtils.CalcBenjaminiHochbergFdr(pvals, threshold, pvals.Length, out fdrs);
                    break;

                default:
                    throw new Exception("Never get here.");
                }
                mdata.AddNumericColumn(mdata.ColumnNames[col] + " Significance A", "", pvals);
                mdata.AddCategoryColumn(mdata.ColumnNames[col] + " A significant", "", fdr);
            }
        }
Example #19
0
 private static void FillMatrixDontKeep(int groupColInd, int validVals, IMatrixData mdata,
                                        Func <IList <double>, double> func)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[,] newExCols  = new double[mdata.RowCount, groupNames.Length];
     double[,] newQuality = new double[mdata.RowCount, groupNames.Length];
     bool[,] newImputed   = new bool[mdata.RowCount, groupNames.Length];
     for (int i = 0; i < newExCols.GetLength(0); i++)
     {
         for (int j = 0; j < newExCols.GetLength(1); j++)
         {
             List <double> vals = new List <double>();
             List <bool>   imps = new List <bool>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                     imps.Add(mdata.IsImputed[i, ind]);
                 }
             }
             bool   imp = false;
             double xy  = double.NaN;
             if (vals.Count >= validVals)
             {
                 xy  = func(vals);
                 imp = ArrayUtils.Or(imps);
             }
             newExCols[i, j]  = xy;
             newQuality[i, j] = double.NaN;
             newImputed[i, j] = imp;
         }
     }
     mdata.ColumnNames        = new List <string>(groupNames);
     mdata.ColumnDescriptions = GetEmpty(groupNames);
     mdata.Values.Set(newExCols);
     mdata.Quality.Set(newQuality);
     mdata.IsImputed.Set(newImputed);
     mdata.RemoveCategoryRowAt(groupColInd);
     for (int i = 0; i < mdata.CategoryRowCount; i++)
     {
         mdata.SetCategoryRowAt(AverageCategoryRow(mdata.GetCategoryRowAt(i), colInds), i);
     }
     for (int i = 0; i < mdata.NumericRows.Count; i++)
     {
         mdata.NumericRows[i] = AverageNumericRow(mdata.NumericRows[i], colInds);
     }
 }
        private static void NonzeroFilterGroup(int minValids, bool percentage, IMatrixData mdata, Parameters param,
                                               bool oneGroup, double threshold, double threshold2, FilteringMode filterMode, IList <string[]> groupCol
                                               )
        {
            List <int> valids    = new List <int>();
            List <int> notvalids = new List <int>();

            string[] groupVals = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
            Array.Sort(groupVals);
            int[][] groupInds = CalcGroupInds(groupVals, groupCol);
            for (int i = 0; i < mdata.RowCount; i++)
            {
                int[] counts = new int[groupVals.Length];
                int[] totals = new int[groupVals.Length];
                for (int j = 0; j < groupInds.Length; j++)
                {
                    for (int k = 0; k < groupInds[j].Length; k++)
                    {
                        if (groupInds[j][k] >= 0)
                        {
                            totals[groupInds[j][k]]++;
                        }
                    }
                    if (PerseusPluginUtils.IsValid(mdata.Values.Get(i, j), threshold, threshold2, filterMode))
                    {
                        for (int k = 0; k < groupInds[j].Length; k++)
                        {
                            if (groupInds[j][k] >= 0)
                            {
                                counts[groupInds[j][k]]++;
                            }
                        }
                    }
                }
                bool[] groupValid = new bool[counts.Length];
                for (int j = 0; j < groupValid.Length; j++)
                {
                    groupValid[j] = PerseusPluginUtils.Valid(counts[j], minValids, percentage, totals[j]);
                }
                if (oneGroup ? ArrayUtils.Or(groupValid) : ArrayUtils.And(groupValid))
                {
                    valids.Add(i);
                }
                else
                {
                    notvalids.Add(i);
                }
            }
            PerseusPluginUtils.FilterRowsNew(mdata, param, valids.ToArray());
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> p = param.GetParamWithSubParams <int>("Column");
            int colInd = p.Value;

            if (colInd < 0)
            {
                processInfo.ErrString = "No categorical columns available.";
                return;
            }
            Parameter <int[]> mcp = p.GetSubParameters().GetParam <int[]>("Values");

            int[] inds = mcp.Value;
            if (inds.Length == 0)
            {
                processInfo.ErrString = "Please select at least one term for filtering.";
                return;
            }
            string[] values = new string[inds.Length];
            string[] v      = mdata.GetCategoryColumnValuesAt(colInd);
            for (int i = 0; i < values.Length; i++)
            {
                values[i] = v[inds[i]];
            }
            HashSet <string> value  = new HashSet <string>(values);
            bool             remove = param.GetParam <int>("Mode").Value == 0;
            List <int>       valids = new List <int>();

            for (int i = 0; i < mdata.RowCount; i++)
            {
                bool valid = true;
                foreach (string w in mdata.GetCategoryColumnEntryAt(colInd, i))
                {
                    if (value.Contains(w))
                    {
                        valid = false;
                        break;
                    }
                }
                if ((valid && remove) || (!valid && !remove))
                {
                    valids.Add(i);
                }
            }
            PerseusPluginUtils.FilterRows(mdata, param, valids.ToArray());
        }
        public void TestBenjaminiHochberFdrCorrectionKinactExample()
        {
            var pValues = new[]
            {
                0.5, 0.26996402, 0.17923912, 0.29354353, double.NaN
            };
            var expectedFdrs = new[]
            {
                0.5, 0.39139137, 0.39139137, 0.39139137, double.NaN
            };

            PerseusPluginUtils.CalcBenjaminiHochbergFdr(pValues, 0.05, out var fdrs);
            for (int i = 0; i < expectedFdrs.Length; i++)
            {
                Assert.AreEqual(expectedFdrs[i], fdrs[i], 0.00001);
            }
        }
        public void TestBenjaminiHochbergFdrCorrectionWithSinglePvalue()
        {
            var pValues = new[]
            {
                0.55418364,
            };
            var expectedFdrs = new[]
            {
                0.55418364,
            };

            PerseusPluginUtils.CalcBenjaminiHochbergFdr(pValues, 0.05, out var fdrs);
            for (int i = 0; i < expectedFdrs.Length; i++)
            {
                Assert.AreEqual(expectedFdrs[i], fdrs[i], 0.00001);
            }
        }
Example #24
0
 private static void AddStandardDeviation(int groupColInd, int validVals, IMatrixData mdata, int varInd)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++)
     {
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++)
     {
         for (int j = 0; j < groupNames.Length; j++)
         {
             List <double> vals = new List <double>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                 }
             }
             double xy = double.NaN;
             if (vals.Count >= validVals)
             {
                 if (varInd == 0)
                 {
                     xy = ArrayUtils.StandardDeviation(vals);
                 }
                 else
                 {
                     xy = ArrayUtils.StandardDeviation(vals) / Math.Sqrt(vals.Count);
                 }
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++)
     {
         string name = "stddev " + groupNames[i];
         mdata.AddNumericColumn(name, name, newNumCols[i]);
     }
 }
Example #25
0
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] numColInds      = param.GetParam <int[]>("Numerical rows").Value;
     int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical rows").Value;
     int[] catColInds      = param.GetParam <int[]>("Categorical rows").Value;
     int[] textColInds     = param.GetParam <int[]>("Text rows").Value;
     data.NumericRows                 = ArrayUtils.SubList(data.NumericRows, numColInds);
     data.NumericRowNames             = ArrayUtils.SubList(data.NumericRowNames, numColInds);
     data.NumericRowDescriptions      = ArrayUtils.SubList(data.NumericRowDescriptions, numColInds);
     data.MultiNumericRows            = ArrayUtils.SubList(data.MultiNumericRows, multiNumColInds);
     data.MultiNumericRowNames        = ArrayUtils.SubList(data.MultiNumericRowNames, multiNumColInds);
     data.MultiNumericRowDescriptions = ArrayUtils.SubList(data.MultiNumericRowDescriptions, multiNumColInds);
     data.CategoryRows                = PerseusPluginUtils.GetCategoryRows(data, catColInds);
     data.CategoryRowNames            = ArrayUtils.SubList(data.CategoryRowNames, catColInds);
     data.CategoryRowDescriptions     = ArrayUtils.SubList(data.CategoryRowDescriptions, catColInds);
     data.StringRows            = ArrayUtils.SubList(data.StringRows, textColInds);
     data.StringRowNames        = ArrayUtils.SubList(data.StringRowNames, textColInds);
     data.StringRowDescriptions = ArrayUtils.SubList(data.StringRowDescriptions, textColInds);
 }
        private static void CategoricalToNumeric(IList <int> colInds, IMatrixData mdata)
        {
            int[]        inds        = ArrayUtils.Complement(colInds, mdata.CategoryColumnCount);
            string[]     name        = ArrayUtils.SubArray(mdata.CategoryColumnNames, colInds);
            string[]     description = ArrayUtils.SubArray(mdata.CategoryColumnDescriptions, colInds);
            string[][][] cat         = PerseusPluginUtils.GetCategoryColumns(mdata, colInds).ToArray();
            var          newNum      = new double[cat.Length][];

            for (int j = 0; j < cat.Length; j++)
            {
                newNum[j] = new double[cat[j].Length];
                for (int i = 0; i < newNum[j].Length; i++)
                {
                    if (cat[j][i] == null || cat[j][i].Length == 0)
                    {
                        newNum[j][i] = double.NaN;
                    }
                    else
                    {
                        double x;
                        bool   s = double.TryParse(cat[j][i][0], out x);
                        if (s)
                        {
                            newNum[j][i] = x;
                        }
                        else
                        {
                            newNum[j][i] = double.NaN;
                        }
                    }
                }
            }
            mdata.NumericColumnNames.AddRange(name);
            mdata.NumericColumnDescriptions.AddRange(description);
            mdata.NumericColumns.AddRange(newNum);
            mdata.CategoryColumns            = PerseusPluginUtils.GetCategoryColumns(mdata, inds);
            mdata.CategoryColumnNames        = ArrayUtils.SubList(mdata.CategoryColumnNames, inds);
            mdata.CategoryColumnDescriptions = ArrayUtils.SubList(mdata.CategoryColumnDescriptions, inds);
        }
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = GetValidExCols(data);
     int[] numColInds      = GetValidNumCols(data);
     int[] multiNumColInds = GetValidMultiNumCols(data);
     int[] catColInds      = GetValidCatCols(data);
     int[] textColInds     = GetValidTextCols(data);
     if (exColInds.Length < data.ColumnCount)
     {
         data.ExtractColumns(exColInds);
     }
     if (numColInds.Length < data.NumericColumnCount)
     {
         data.NumericColumns            = ArrayUtils.SubList(data.NumericColumns, numColInds);
         data.NumericColumnNames        = ArrayUtils.SubList(data.NumericColumnNames, numColInds);
         data.NumericColumnDescriptions = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds);
     }
     if (multiNumColInds.Length < data.MultiNumericColumnCount)
     {
         data.MultiNumericColumns            = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds);
         data.MultiNumericColumnNames        = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds);
         data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds);
     }
     if (catColInds.Length < data.CategoryColumnCount)
     {
         data.CategoryColumns            = PerseusPluginUtils.GetCategoryColumns(data, catColInds);
         data.CategoryColumnNames        = ArrayUtils.SubList(data.CategoryColumnNames, catColInds);
         data.CategoryColumnDescriptions = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds);
     }
     if (textColInds.Length < data.StringColumnCount)
     {
         data.StringColumns      = ArrayUtils.SubList(data.StringColumns, textColInds);
         data.StringColumnNames  = ArrayUtils.SubList(data.StringColumnNames, textColInds);
         data.ColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds);
     }
 }
Example #28
0
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = param.GetParam <int[]>("Main columns").Value;
     int[] numColInds      = param.GetParam <int[]>("Numerical columns").Value;
     int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value;
     int[] catColInds      = param.GetParam <int[]>("Categorical columns").Value;
     int[] textColInds     = param.GetParam <int[]>("Text columns").Value;
     data.ExtractColumns(exColInds);
     data.NumericColumns                 = ArrayUtils.SubList(data.NumericColumns, numColInds);
     data.NumericColumnNames             = ArrayUtils.SubList(data.NumericColumnNames, numColInds);
     data.NumericColumnDescriptions      = ArrayUtils.SubList(data.NumericColumnDescriptions, numColInds);
     data.MultiNumericColumns            = ArrayUtils.SubList(data.MultiNumericColumns, multiNumColInds);
     data.MultiNumericColumnNames        = ArrayUtils.SubList(data.MultiNumericColumnNames, multiNumColInds);
     data.MultiNumericColumnDescriptions = ArrayUtils.SubList(data.MultiNumericColumnDescriptions, multiNumColInds);
     data.CategoryColumns                = PerseusPluginUtils.GetCategoryColumns(data, catColInds);
     data.CategoryColumnNames            = ArrayUtils.SubList(data.CategoryColumnNames, catColInds);
     data.CategoryColumnDescriptions     = ArrayUtils.SubList(data.CategoryColumnDescriptions, catColInds);
     data.StringColumns     = ArrayUtils.SubList(data.StringColumns, textColInds);
     data.StringColumnNames = ArrayUtils.SubList(data.StringColumnNames, textColInds);
     //      data.ColumnDescriptions = ArrayUtils.SubList(data.ColumnDescriptions, textColInds);
     //  data.ColumnNames = ArrayUtils.SubList(data.ColumnNames, exColInds);
     //       data.StringColumnDescriptions = ArrayUtils.SubList(data.StringColumnDescriptions, textColInds);
 }
Example #29
0
 private static void FillMatrixKeep(int groupColInd, int validVals, IMatrixData mdata, Func <IList <double>, double> func)
 {
     string[][] groupCol   = mdata.GetCategoryRowAt(groupColInd);
     string[]   groupNames = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
     int[][]    colInds    = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
     double[][] newNumCols = new double[groupNames.Length][];
     for (int i = 0; i < newNumCols.Length; i++)
     {
         newNumCols[i] = new double[mdata.RowCount];
     }
     for (int i = 0; i < mdata.RowCount; i++)
     {
         for (int j = 0; j < groupNames.Length; j++)
         {
             List <double> vals = new List <double>();
             foreach (int ind in colInds[j])
             {
                 double val = mdata.Values.Get(i, ind);
                 if (!double.IsNaN(val) && !double.IsInfinity(val))
                 {
                     vals.Add(val);
                 }
             }
             double xy = double.NaN;
             if (vals.Count >= validVals)
             {
                 xy = func(vals);
             }
             newNumCols[j][i] = xy;
         }
     }
     for (int i = 0; i < groupNames.Length; i++)
     {
         mdata.AddNumericColumn(groupNames[i], groupNames[i], newNumCols[i]);
     }
 }
Example #30
0
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables,
                                ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            ParameterWithSubParams <int> xp = param.GetParamWithSubParams <int>("Expression column selection");
            bool groups = xp.Value == 2;

            string[] groupNames    = null;
            int[][]  colIndsGroups = null;
            if (groups)
            {
                int        groupRowInd = xp.GetSubParameters().GetParam <int>("Group").Value;
                string[][] groupCol    = mdata.GetCategoryRowAt(groupRowInd);
                groupNames    = ArrayUtils.UniqueValuesPreserveOrder(groupCol);
                colIndsGroups = PerseusPluginUtils.GetMainColIndices(groupCol, groupNames);
            }
            int[] useCols = xp.Value == 1
                                ? xp.GetSubParameters().GetParam <int[]>("Columns").Value
                                : ArrayUtils.ConsecutiveInts(mdata.ColumnCount);
            HashSet <int> w = ArrayUtils.ToHashSet(param.GetParam <int[]>("Calculate").Value);

            bool[]       include  = new bool[procs.Length];
            double[][]   columns  = new double[procs.Length][];
            double[][][] columnsG = null;
            if (groups)
            {
                columnsG = new double[procs.Length][][];
                for (int i = 0; i < columnsG.Length; i++)
                {
                    columnsG[i] = new double[groupNames.Length][];
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                include[i] = w.Contains(i);
                if (include[i])
                {
                    columns[i] = new double[mdata.RowCount];
                    if (groups)
                    {
                        for (int j = 0; j < groupNames.Length; j++)
                        {
                            columnsG[i][j] = new double[mdata.RowCount];
                        }
                    }
                }
            }
            for (int i = 0; i < mdata.RowCount; i++)
            {
                List <double> v = new List <double>();
                foreach (int j in useCols)
                {
                    double x = mdata.Values.Get(i, j);
                    if (!double.IsNaN(x) && !double.IsInfinity(x))
                    {
                        v.Add(x);
                    }
                }
                for (int j = 0; j < include.Length; j++)
                {
                    if (include[j])
                    {
                        columns[j][i] = procs[j].Item2(v);
                    }
                }
                if (groups)
                {
                    List <double>[] vg = new List <double> [groupNames.Length];
                    for (int j = 0; j < colIndsGroups.Length; j++)
                    {
                        vg[j] = new List <double>();
                        for (int k = 0; k < colIndsGroups[j].Length; k++)
                        {
                            double x = mdata.Values.Get(i, colIndsGroups[j][k]);
                            if (!double.IsNaN(x) && !double.IsInfinity(x))
                            {
                                vg[j].Add(x);
                            }
                        }
                    }
                    for (int j = 0; j < include.Length; j++)
                    {
                        if (include[j])
                        {
                            for (int k = 0; k < groupNames.Length; k++)
                            {
                                columnsG[j][k][i] = procs[j].Item2(vg[k]);
                            }
                        }
                    }
                }
            }
            for (int i = 0; i < include.Length; i++)
            {
                if (include[i])
                {
                    mdata.AddNumericColumn(procs[i].Item1, procs[i].Item3, columns[i]);
                    if (groups)
                    {
                        for (int k = 0; k < groupNames.Length; k++)
                        {
                            mdata.AddNumericColumn(procs[i].Item1 + " " + groupNames[k], procs[i].Item3, columnsG[i][k]);
                        }
                    }
                }
            }
        }