Esempio n. 1
0
        public void SmallTest()
        {
            IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ]
            {
                { 0, 4 },
                { 1, 5 },
                { 2, 6 },
                { 3, 7 }
            });

            mdata.AddStringColumn("id", "", new [] { "a", "b", "b", "b" });
            mdata.AddStringColumn("str", "", new [] { "a;b", "b;c", "c;d", "d;e" });
            mdata.AddCategoryColumn("cat", "", new[] { new[] { "a", "b" }, new[] { "b", "c" }, new[] { "c", "d" }, new[] { "d", "e" } });
            mdata.AddNumericColumn("num", "", new [] { 0, 1, 2, 3, 4.0 });
            mdata.AddMultiNumericColumn("mnum", "", new [] { new [] { 0, 4d }, new [] { 1, 5d }, new [] { 2, 6d }, new [] { 3, 7d } });
            mdata.UniqueRows(mdata.StringColumns[0], ArrayUtils.Median, UniqueRows.Union, UniqueRows.CatUnion, UniqueRows.MultiNumUnion);

            Assert.AreEqual(2, mdata.RowCount);
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.Values.GetColumn(0));
            CollectionAssert.AreEqual(new [] { 4, 6 }, mdata.Values.GetColumn(1));
            CollectionAssert.AreEqual(new [] { "a;b", "b;c;d;e" }, mdata.GetStringColumn("str"));
            CollectionAssert.AreEqual(new [] { new [] { "a", "b" }, new [] { "b", "c", "d", "e" } }, mdata.GetCategoryColumnAt(0));
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.NumericColumns[0]);
            CollectionAssert.AreEqual(new [] { new [] { 0d, 4 }, new [] { 1d, 5, 2, 6, 3, 7 } }, mdata.MultiNumericColumns[0]);
        }
 public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables,
                         ref IDocumentData[] documents, ProcessInfo processInfo)
 {
     int[] exColInds       = param.GetParam <int[]>("Main columns").Value;
     int[] numColInds      = param.GetParam <int[]>("Numerical columns").Value;
     int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value;
     int[] catColInds      = param.GetParam <int[]>("Categorical columns").Value;
     int[] textColInds     = param.GetParam <int[]>("Text columns").Value;
     if (exColInds.Length > 0)
     {
         int ncol = data.ColumnCount;
         data.ExtractColumns(ArrayUtils.Concat(ArrayUtils.ConsecutiveInts(data.ColumnCount), exColInds));
         HashSet <string> taken = new HashSet <string>(data.ColumnNames);
         for (int i = 0; i < exColInds.Length; i++)
         {
             string s = StringUtils.GetNextAvailableName(data.ColumnNames[ncol + i], taken);
             data.ColumnNames[ncol + i] = s;
             taken.Add(s);
         }
     }
     foreach (int ind in numColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.NumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.NumericColumnNames[ind], taken);
         data.AddNumericColumn(s, data.NumericColumnDescriptions[ind], (double[])data.NumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in multiNumColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.MultiNumericColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.MultiNumericColumnNames[ind], taken);
         data.AddMultiNumericColumn(s, data.MultiNumericColumnDescriptions[ind],
                                    (double[][])data.MultiNumericColumns[ind].Clone());
         taken.Add(s);
     }
     foreach (int ind in catColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.CategoryColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.CategoryColumnNames[ind], taken);
         data.AddCategoryColumn(s, data.CategoryColumnDescriptions[ind], data.GetCategoryColumnAt(ind));
         taken.Add(s);
     }
     foreach (int ind in textColInds)
     {
         HashSet <string> taken = new HashSet <string>(data.StringColumnNames);
         string           s     = StringUtils.GetNextAvailableName(data.StringColumnNames[ind], taken);
         data.AddStringColumn(s, data.ColumnDescriptions[ind], (string[])data.StringColumns[ind].Clone());
         taken.Add(s);
     }
 }
Esempio n. 3
0
        public void WriteMatrixTest()
        {
            // main data
            IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] {
                { 1, 2, 3 }, { 3, 4, 5 }
            },
                                                                new List <string> {
                "col1", "col2", "col3"
            });

            // annotation rows
            mdata.AddCategoryRow("catrow", "this is catrow", new[] { new[] { "cat1" }, new[] { "cat1", "cat2" }, new[] { "cat2" } });
            mdata.AddNumericRow("numrow", "this is numrow", new[] { -1.0, 1, 2 });
            // annotation columns
            mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" });
            mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" });
            mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 });
            mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} });
            mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } });

            string mdataStr;

            using (MemoryStream memstream = new MemoryStream())
                using (StreamWriter writer = new StreamWriter(memstream)) {
                    PerseusUtils.WriteMatrix(mdata, writer);
                    writer.Flush();
                    mdataStr = Encoding.UTF8.GetString(memstream.ToArray());
                }

            IMatrixData mdata2 = PerseusFactory.CreateMatrixData();

            PerseusUtils.ReadMatrix(mdata2, new ProcessInfo(new Settings(), status => { }, progress => { }, 1), () => {
                StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr)));
                return(tmpStream);
            }, "matrix1", '\t');

            Assert.AreEqual(2, mdata2.RowCount);
            Assert.AreEqual(3, mdata2.ColumnCount);

            Assert.AreEqual(2, mdata2.StringColumnCount);
            Assert.AreEqual(1, mdata2.NumericColumnCount);
            Assert.AreEqual(1, mdata2.CategoryColumnCount);
            Assert.AreEqual(1, mdata2.MultiNumericColumnCount);

            Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]);

            Assert.AreEqual(1, mdata2.CategoryRowCount);
            Assert.AreEqual(1, mdata2.NumericRowCount);
        }
Esempio n. 4
0
        public IMatrixData ProcessData(IMatrixData[] inputData, Parameters parameters, ref IMatrixData[] supplTables,
                                       ref IDocumentData[] documents, ProcessInfo processInfo)
        {
            IMatrixData mdata1 = inputData[0];
            IMatrixData mdata2 = inputData[1];

            ((int, int)first, (int, int)? second, bool outer, bool ignoreCase)matching =
                ParseMatchingColumns(parameters);
            (int[][] indexMap, int[] unmappedRightIndices) =
                GetIndexMap(mdata1, mdata2, matching.first, matching.second, matching.ignoreCase);
            IMatrixData result = (IMatrixData)mdata1.Clone();

            result.Origin = "Combination";
            if (matching.outer)
            {
                int[][] extendedIndexMap = new int[indexMap.Length + unmappedRightIndices.Length][];
                Array.Copy(indexMap, extendedIndexMap, indexMap.Length);
                for (int i = 0; i < unmappedRightIndices.Length; i++)
                {
                    int idx = indexMap.Length + i;
                    extendedIndexMap[idx] = new[] { unmappedRightIndices[i] };
                }
                result.AddEmptyRows(unmappedRightIndices.Length);
                UpdateIdColumns(result, mdata2, indexMap.Length, unmappedRightIndices, matching.first, matching.second);
                indexMap = extendedIndexMap;
            }
            bool addIndicator = parameters.GetParam <bool>("Add indicator").Value;

            if (addIndicator)
            {
                AddIndicator(result, mdata2, indexMap);
            }
            bool addRowIndex = parameters.GetParam <bool>("Add original row numbers").Value;

            if (addRowIndex)
            {
                result.AddMultiNumericColumn("Original row numbers", "",
                                             indexMap.Select(rows => rows.Select(Convert.ToDouble).ToArray()).ToArray());
            }
            ((int[] copy, int combine)main, int[] text, (int[] copy, int combine)numeric, int[] category) =
                ParseCopyParameters(parameters);
            SetAnnotationRows(result, mdata1, mdata2, main.copy);
            AddMainColumns(result, mdata2, indexMap, main.copy, GetAveraging(main.combine));
            AddAnnotationColumns(result, mdata2, indexMap, text, numeric, category);
            return(result);
        }
Esempio n. 5
0
        public void TestSmallExample()
        {
            double[,] values = new[, ]
            {
                { 0.0, 1.0, 0, 5 },
                { 2.0, 3.0, 0, 5 }
            };
            IMatrixData mdata = PerseusFactory.CreateMatrixData(values, new List <string> {
                "Col___1", "Col___2", "Col___3", "No expand"
            });

            mdata.ColumnDescriptions = new List <string> {
                "Description Col", "Col", "Col", "Description No expand"
            };
            double[][] multiNum = new[]
            {
                new[] { 0.0, 1.0 },
                new[] { 2.0 }
            };
            mdata.AddMultiNumericColumn("MultiNum", "", multiNum);
            string[] stringCol = new[] { "row1", "row2" };
            mdata.AddStringColumn("String", "", stringCol);
            ExpandSiteTable expand = new ExpandSiteTable();

            IMatrixData[]   supplData = null;
            IDocumentData[] docs      = null;
            expand.ProcessData(mdata, new Parameters(), ref supplData, ref docs, CreateProcessInfo());
            Assert.AreEqual(2, mdata.ColumnCount);
            CollectionAssert.AreEqual(new [] { "No expand", "Col" }, mdata.ColumnNames.ToArray());
            Assert.AreEqual(2, mdata.ColumnDescriptions.Count);
            CollectionAssert.AreEqual(new [] { "Description No expand", "Description Col" }, mdata.ColumnDescriptions.ToArray());
            Assert.AreEqual(6, mdata.RowCount);
            Assert.AreEqual(2, mdata.StringColumnCount);
            CollectionAssert.AreEqual(new [] { "String", "Unique identifier" }, mdata.StringColumnNames);
            CollectionAssert.AreEqual(stringCol.Concat(stringCol).Concat(stringCol).ToArray(), mdata.StringColumns[0]);
            Assert.AreEqual(1, mdata.MultiNumericColumnCount);
            CollectionAssert.AreEqual(multiNum.Concat(multiNum).Concat(multiNum).ToArray(), mdata.MultiNumericColumns[0]);
        }