public void SmallTest() { IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] { { 0, 4 }, { 1, 5 }, { 2, 6 }, { 3, 7 } }); mdata.AddStringColumn("id", "", new [] { "a", "b", "b", "b" }); mdata.AddStringColumn("str", "", new [] { "a;b", "b;c", "c;d", "d;e" }); mdata.AddCategoryColumn("cat", "", new[] { new[] { "a", "b" }, new[] { "b", "c" }, new[] { "c", "d" }, new[] { "d", "e" } }); mdata.AddNumericColumn("num", "", new [] { 0, 1, 2, 3, 4.0 }); mdata.AddMultiNumericColumn("mnum", "", new [] { new [] { 0, 4d }, new [] { 1, 5d }, new [] { 2, 6d }, new [] { 3, 7d } }); mdata.UniqueRows(mdata.StringColumns[0], ArrayUtils.Median, UniqueRows.Union, UniqueRows.CatUnion, UniqueRows.MultiNumUnion); Assert.AreEqual(2, mdata.RowCount); CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.Values.GetColumn(0)); CollectionAssert.AreEqual(new [] { 4, 6 }, mdata.Values.GetColumn(1)); CollectionAssert.AreEqual(new [] { "a;b", "b;c;d;e" }, mdata.GetStringColumn("str")); CollectionAssert.AreEqual(new [] { new [] { "a", "b" }, new [] { "b", "c", "d", "e" } }, mdata.GetCategoryColumnAt(0)); CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.NumericColumns[0]); CollectionAssert.AreEqual(new [] { new [] { 0d, 4 }, new [] { 1d, 5, 2, 6, 3, 7 } }, mdata.MultiNumericColumns[0]); }
public void ProcessData(IMatrixData data, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { int[] exColInds = param.GetParam <int[]>("Main columns").Value; int[] numColInds = param.GetParam <int[]>("Numerical columns").Value; int[] multiNumColInds = param.GetParam <int[]>("Multi-numerical columns").Value; int[] catColInds = param.GetParam <int[]>("Categorical columns").Value; int[] textColInds = param.GetParam <int[]>("Text columns").Value; if (exColInds.Length > 0) { int ncol = data.ColumnCount; data.ExtractColumns(ArrayUtils.Concat(ArrayUtils.ConsecutiveInts(data.ColumnCount), exColInds)); HashSet <string> taken = new HashSet <string>(data.ColumnNames); for (int i = 0; i < exColInds.Length; i++) { string s = StringUtils.GetNextAvailableName(data.ColumnNames[ncol + i], taken); data.ColumnNames[ncol + i] = s; taken.Add(s); } } foreach (int ind in numColInds) { HashSet <string> taken = new HashSet <string>(data.NumericColumnNames); string s = StringUtils.GetNextAvailableName(data.NumericColumnNames[ind], taken); data.AddNumericColumn(s, data.NumericColumnDescriptions[ind], (double[])data.NumericColumns[ind].Clone()); taken.Add(s); } foreach (int ind in multiNumColInds) { HashSet <string> taken = new HashSet <string>(data.MultiNumericColumnNames); string s = StringUtils.GetNextAvailableName(data.MultiNumericColumnNames[ind], taken); data.AddMultiNumericColumn(s, data.MultiNumericColumnDescriptions[ind], (double[][])data.MultiNumericColumns[ind].Clone()); taken.Add(s); } foreach (int ind in catColInds) { HashSet <string> taken = new HashSet <string>(data.CategoryColumnNames); string s = StringUtils.GetNextAvailableName(data.CategoryColumnNames[ind], taken); data.AddCategoryColumn(s, data.CategoryColumnDescriptions[ind], data.GetCategoryColumnAt(ind)); taken.Add(s); } foreach (int ind in textColInds) { HashSet <string> taken = new HashSet <string>(data.StringColumnNames); string s = StringUtils.GetNextAvailableName(data.StringColumnNames[ind], taken); data.AddStringColumn(s, data.ColumnDescriptions[ind], (string[])data.StringColumns[ind].Clone()); taken.Add(s); } }
public void WriteMatrixTest() { // main data IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] { { 1, 2, 3 }, { 3, 4, 5 } }, new List <string> { "col1", "col2", "col3" }); // annotation rows mdata.AddCategoryRow("catrow", "this is catrow", new[] { new[] { "cat1" }, new[] { "cat1", "cat2" }, new[] { "cat2" } }); mdata.AddNumericRow("numrow", "this is numrow", new[] { -1.0, 1, 2 }); // annotation columns mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" }); mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" }); mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 }); mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} }); mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } }); string mdataStr; using (MemoryStream memstream = new MemoryStream()) using (StreamWriter writer = new StreamWriter(memstream)) { PerseusUtils.WriteMatrix(mdata, writer); writer.Flush(); mdataStr = Encoding.UTF8.GetString(memstream.ToArray()); } IMatrixData mdata2 = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrix(mdata2, new ProcessInfo(new Settings(), status => { }, progress => { }, 1), () => { StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr))); return(tmpStream); }, "matrix1", '\t'); Assert.AreEqual(2, mdata2.RowCount); Assert.AreEqual(3, mdata2.ColumnCount); Assert.AreEqual(2, mdata2.StringColumnCount); Assert.AreEqual(1, mdata2.NumericColumnCount); Assert.AreEqual(1, mdata2.CategoryColumnCount); Assert.AreEqual(1, mdata2.MultiNumericColumnCount); Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]); Assert.AreEqual(1, mdata2.CategoryRowCount); Assert.AreEqual(1, mdata2.NumericRowCount); }
public IMatrixData ProcessData(IMatrixData[] inputData, Parameters parameters, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { IMatrixData mdata1 = inputData[0]; IMatrixData mdata2 = inputData[1]; ((int, int)first, (int, int)? second, bool outer, bool ignoreCase)matching = ParseMatchingColumns(parameters); (int[][] indexMap, int[] unmappedRightIndices) = GetIndexMap(mdata1, mdata2, matching.first, matching.second, matching.ignoreCase); IMatrixData result = (IMatrixData)mdata1.Clone(); result.Origin = "Combination"; if (matching.outer) { int[][] extendedIndexMap = new int[indexMap.Length + unmappedRightIndices.Length][]; Array.Copy(indexMap, extendedIndexMap, indexMap.Length); for (int i = 0; i < unmappedRightIndices.Length; i++) { int idx = indexMap.Length + i; extendedIndexMap[idx] = new[] { unmappedRightIndices[i] }; } result.AddEmptyRows(unmappedRightIndices.Length); UpdateIdColumns(result, mdata2, indexMap.Length, unmappedRightIndices, matching.first, matching.second); indexMap = extendedIndexMap; } bool addIndicator = parameters.GetParam <bool>("Add indicator").Value; if (addIndicator) { AddIndicator(result, mdata2, indexMap); } bool addRowIndex = parameters.GetParam <bool>("Add original row numbers").Value; if (addRowIndex) { result.AddMultiNumericColumn("Original row numbers", "", indexMap.Select(rows => rows.Select(Convert.ToDouble).ToArray()).ToArray()); } ((int[] copy, int combine)main, int[] text, (int[] copy, int combine)numeric, int[] category) = ParseCopyParameters(parameters); SetAnnotationRows(result, mdata1, mdata2, main.copy); AddMainColumns(result, mdata2, indexMap, main.copy, GetAveraging(main.combine)); AddAnnotationColumns(result, mdata2, indexMap, text, numeric, category); return(result); }
public void TestSmallExample() { double[,] values = new[, ] { { 0.0, 1.0, 0, 5 }, { 2.0, 3.0, 0, 5 } }; IMatrixData mdata = PerseusFactory.CreateMatrixData(values, new List <string> { "Col___1", "Col___2", "Col___3", "No expand" }); mdata.ColumnDescriptions = new List <string> { "Description Col", "Col", "Col", "Description No expand" }; double[][] multiNum = new[] { new[] { 0.0, 1.0 }, new[] { 2.0 } }; mdata.AddMultiNumericColumn("MultiNum", "", multiNum); string[] stringCol = new[] { "row1", "row2" }; mdata.AddStringColumn("String", "", stringCol); ExpandSiteTable expand = new ExpandSiteTable(); IMatrixData[] supplData = null; IDocumentData[] docs = null; expand.ProcessData(mdata, new Parameters(), ref supplData, ref docs, CreateProcessInfo()); Assert.AreEqual(2, mdata.ColumnCount); CollectionAssert.AreEqual(new [] { "No expand", "Col" }, mdata.ColumnNames.ToArray()); Assert.AreEqual(2, mdata.ColumnDescriptions.Count); CollectionAssert.AreEqual(new [] { "Description No expand", "Description Col" }, mdata.ColumnDescriptions.ToArray()); Assert.AreEqual(6, mdata.RowCount); Assert.AreEqual(2, mdata.StringColumnCount); CollectionAssert.AreEqual(new [] { "String", "Unique identifier" }, mdata.StringColumnNames); CollectionAssert.AreEqual(stringCol.Concat(stringCol).Concat(stringCol).ToArray(), mdata.StringColumns[0]); Assert.AreEqual(1, mdata.MultiNumericColumnCount); CollectionAssert.AreEqual(multiNum.Concat(multiNum).Concat(multiNum).ToArray(), mdata.MultiNumericColumns[0]); }