Example #1
0
        public void TestLoadingRawTxt()
        {
            var lines = new[]
            {
                "Col_1 Col_2",
                "a b",
            };
            var tmpFile = Path.GetTempFileName();

            File.WriteAllLines(tmpFile, lines);
            var upload     = new UnstructuredTxtUpload();
            var errString  = string.Empty;
            var parameters = upload.GetParameters(ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString));
            parameters.GetParam <string>("File").Value = tmpFile;
            var subparam = parameters.GetParamWithSubParams <bool>("Split into columns");

            subparam.Value = true;
            subparam.GetSubParameters().GetParam <int>("Separator").Value = 2;
            var mdata = PerseusFactory.CreateMatrixData();

            IMatrixData[]   suppl  = null;
            IDocumentData[] supplD = null;
            upload.LoadData(mdata, parameters, ref suppl, ref supplD, new ProcessInfo(new Settings(), s => { }, i => { }, 1));
            CollectionAssert.AreEqual(new [] { "Col_1", "Col_2" }, mdata.StringColumnNames);
            CollectionAssert.AreEqual(new [] { "a" }, mdata.StringColumns[0]);
            CollectionAssert.AreEqual(new [] { "b" }, mdata.StringColumns[1]);
        }
Example #2
0
        public void TestMatrixProcessingWithSupplementaryTables()
        {
            if (!PluginInterop.Python.Utils.TryFindPythonExecutable(out string _))
            {
                Assert.Inconclusive("Python not installed");
            }
            Assert.Inconclusive("Cannot be tested without dependency on PerseusLibS");
            var codeString = Properties.Resources.matrix_with_supp_tables;
            var codeFile   = Path.GetTempFileName();

            File.WriteAllText(codeFile, Encoding.UTF8.GetString(codeString));
            var processing = new MatrixProcessingWithSupplementaryTables();
            var mdata      = PerseusFactory.CreateMatrixData(new [, ] {
                { 0.0, 1.0 }
            }, new List <string> {
                "col 1", "col 2"
            });
            var errString  = string.Empty;
            var parameters = processing.GetParameters(mdata, ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString));
            parameters.GetParam <string>("Script file").Value = codeFile;
            IMatrixData[]   suppData = null;
            IDocumentData[] suppDocs = null;
            var             pinfo    = new ProcessInfo(new Settings(), s => { }, i => { }, 1);

            processing.ProcessData(mdata, parameters, ref suppData, ref suppDocs, pinfo);
            Assert.IsTrue(string.IsNullOrEmpty(pinfo.ErrString), pinfo.ErrString);
            Assert.AreEqual(2, suppData.Length);
            foreach (var data in suppData)
            {
                Assert.NotNull(data);
            }
        }
Example #3
0
        public void TestLoadingRawTxtFromGzip()
        {
            var lines = new[]
            {
                "Col_1 Col_2",
                "a b",
            };
            var tmpFile = Path.GetTempFileName() + ".gz";

            using (var memory = new MemoryStream(Encoding.UTF8.GetBytes(string.Join("\n", lines))))
                using (var outFile = File.Create(tmpFile))
                    using (var gzip = new GZipStream(outFile, CompressionMode.Compress))
                    {
                        memory.CopyTo(gzip);
                    }
            var upload     = new UnstructuredTxtUpload();
            var errString  = string.Empty;
            var parameters = upload.GetParameters(ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString));
            parameters.GetParam <string>("File").Value = tmpFile;
            var subparam = parameters.GetParamWithSubParams <bool>("Split into columns");

            subparam.Value = true;
            subparam.GetSubParameters().GetParam <int>("Separator").Value = 2;
            var mdata = PerseusFactory.CreateMatrixData();

            IMatrixData[]   suppl  = null;
            IDocumentData[] supplD = null;
            upload.LoadData(mdata, parameters, ref suppl, ref supplD, new ProcessInfo(new Settings(), s => { }, i => { }, 1));
            CollectionAssert.AreEqual(new [] { "Col_1", "Col_2" }, mdata.StringColumnNames);
            CollectionAssert.AreEqual(new [] { "a" }, mdata.StringColumns[0]);
            CollectionAssert.AreEqual(new [] { "b" }, mdata.StringColumns[1]);
        }
        private static void ReadMatrixDataInto(IDataWithAnnotationColumns data, string file, ProcessInfo processInfo)
        {
            var mdata = PerseusFactory.CreateMatrixData();

            PerseusUtils.ReadMatrixFromFile(mdata, processInfo, file, '\t');
            data.CopyAnnotationColumnsFrom(mdata);
        }
Example #5
0
        public void TestReadWriteMatrixRoundTrip()
        {
            var mdata = PerseusFactory.CreateMatrixData();

            mdata.AddStringColumn("StringCol", "", new []
            {
                "Regular text",
                "\"Regular quoted text\"",
                "\"Quote stops\" in the middle",
                "\"Escaped\tseparator\"with extra",
            });
            mdata.AddNumericColumn("NumCol", "", mdata.StringColumns[0].Select(_ => 1.0).ToArray());
            mdata.AddCategoryColumn("CatCol", "", mdata.StringColumns[0].Select(_ => new string[0]).ToArray());
            string content;

            using (var memory = new MemoryStream())
                using (var writer = new StreamWriter(memory))
                {
                    PerseusUtils.WriteMatrix(mdata, writer);
                    writer.Flush();
                    content = Encoding.UTF8.GetString(memory.ToArray());
                }
            var mdata2      = PerseusFactory.CreateMatrixData();
            var processInfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1);

            PerseusUtils.ReadMatrix(mdata2, processInfo, () => new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(content))), "name", '\t');
            var expected = mdata.StringColumns[0];

            expected[1] = expected[1].Trim('\"');
            CollectionAssert.AreEqual(mdata.StringColumns[0], mdata2.StringColumns[0]);
        }
Example #6
0
 public void Setup()
 {
     _mdata = PerseusFactory.CreateMatrixData(new[, ] {
         { 0.0, 1.0, 0.0 }, { 0.0, 0.0, 0.0 }, { 0.0, 1.0, 0.0 }
     });
     _mdata.AddStringColumn("test", "", new [] { "a", "b", "a" });
 }
Example #7
0
        /// <summary>
        /// Read supplementary files according to file paths and data types.
        /// </summary>
        /// <param name="suppFiles"></param>
        /// <param name="supplDataTypes"></param>
        /// <param name="processInfo"></param>
        /// <returns></returns>
        /// <exception cref="NotImplementedException"></exception>
        public static IData[] ReadSupplementaryData(string[] suppFiles, DataType[] supplDataTypes, ProcessInfo processInfo)
        {
            var numSupplTables = suppFiles.Length;

            IData[] supplData = new IData[numSupplTables];
            for (int i = 0; i < numSupplTables; i++)
            {
                switch (supplDataTypes[i])
                {
                case DataType.Matrix:
                    var mdata = PerseusFactory.CreateMatrixData();
                    PerseusUtils.ReadMatrixFromFile(mdata, processInfo, suppFiles[i], '\t');
                    supplData[i] = mdata;
                    break;

                case DataType.Network:
                    var ndata = PerseusFactory.CreateNetworkData();
                    FolderFormat.Read(ndata, suppFiles[i], processInfo);
                    supplData[i] = ndata;
                    break;

                default:
                    throw new NotImplementedException($"Data type {supplDataTypes[i]} not supported!");
                }
            }
            return(supplData);
        }
Example #8
0
        public void TestIsConsistent()
        {
            var random     = new CreateRandomMatrix();
            var errString  = string.Empty;
            var parameters = random.GetParameters(ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString), errString);
            var mdata = PerseusFactory.CreateMatrixData();

            IMatrixData[]   suppl  = null;
            IDocumentData[] supplD = null;
            var             pInfo  = new ProcessInfo(new Settings(), s => { }, i => { }, 1);

            random.LoadData(mdata, parameters, ref suppl, ref supplD, pInfo);
            Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString);
            Assert.IsTrue(mdata.IsConsistent(out string randCons), randCons);

            var transpose = new Transpose();

            parameters = transpose.GetParameters(mdata, ref errString);
            Assert.IsTrue(string.IsNullOrEmpty(errString), errString);
            transpose.ProcessData(mdata, parameters, ref suppl, ref supplD, pInfo);
            Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString);
            Assert.IsTrue(mdata.IsConsistent(out var transCons), transCons);
        }
        public void TestGetParameters()
        {
            var processing  = new ManageCategoricalAnnotRow();
            var errorString = string.Empty;
            var mdata       = PerseusFactory.CreateMatrixData(new[, ] {
                { 0.0, 0, 0 }, { 1, 1, 1 }
            }, new List <string> {
                "a_1", "a_2", "b_1"
            });
            var parameters = processing.GetParameters(mdata, ref errorString);

            Assert.AreEqual(string.Empty, errorString);
            var action = parameters.GetParamWithSubParams <int>("Action");

            action.Value = 1;
            action.GetSubParameters().GetParam <string>("Name").Value = "Experiment";
            IMatrixData[]   suppl  = null;
            IDocumentData[] suppld = null;
            processing.ProcessData(mdata, parameters, ref suppl, ref suppld, new ProcessInfo(new Settings(), s => { },
                                                                                             i => { }, 1));
            Assert.AreEqual("Experiment", mdata.CategoryRowNames[0]);
            CollectionAssert.AreEquivalent(new [] { "a", "b" }, mdata.GetCategoryRowValuesAt(0));
            CollectionAssert.AreEqual(new [] { "a" }, mdata.GetCategoryRowEntryAt(0, 0));
            CollectionAssert.AreEqual(new [] { "a" }, mdata.GetCategoryRowEntryAt(0, 1));
            CollectionAssert.AreEqual(new [] { "b" }, mdata.GetCategoryRowEntryAt(0, 2));
        }
Example #10
0
        public static void Read(INetworkData ndata, string outFolder, ProcessInfo processInfo)
        {
            ReadMatrixDataInto(ndata, Path.Combine(outFolder, "networks.txt"), processInfo);
            foreach (var netAttr in ndata.GetStringColumn("guid").Zip(ndata.GetStringColumn("name"), (guid, name) => new { guid, name }))
            {
                var guid      = Guid.Parse(netAttr.guid);
                var nodeTable = PerseusFactory.CreateDataWithAnnotationColumns();
                var edgeTable = PerseusFactory.CreateDataWithAnnotationColumns();
                ReadMatrixDataInto(nodeTable, Path.Combine(outFolder, $"{guid}_nodes.txt"), processInfo);
                ReadMatrixDataInto(edgeTable, Path.Combine(outFolder, $"{guid}_edges.txt"), processInfo);
                var graph      = new Graph();
                var nodeIndex  = new Dictionary <INode, int>();
                var nameToNode = new Dictionary <string, INode>();
                var nodeColumn = nodeTable.GetStringColumn("node");

                for (int row = 0; row < nodeTable.RowCount; row++)
                {
                    var node = graph.AddNode();
                    nodeIndex[node]             = row;
                    nameToNode[nodeColumn[row]] = node;
                }
                var sourceColumn = edgeTable.GetStringColumn("source");
                var targetColumn = edgeTable.GetStringColumn("target");
                var edgeIndex    = new Dictionary <IEdge, int>();
                for (int row = 0; row < edgeTable.RowCount; row++)
                {
                    var source = nameToNode[sourceColumn[row]];
                    var target = nameToNode[targetColumn[row]];
                    var edge   = graph.AddEdge(source, target);
                    edgeIndex[edge] = row;
                }
                ndata.AddNetworks(new NetworkInfo(graph, nodeTable, nodeIndex, edgeTable, edgeIndex, netAttr.name, guid));
            }
            ReadMatrixDataInto(ndata, Path.Combine(outFolder, "networks.txt"), processInfo);
        }
        public void TestConvertNumericToMultiNumeric()
        {
            var mBase = PerseusFactory.CreateMatrixData();

            mBase.AddStringColumn("Id", "", new [] { "n1;n2", "n3" });
            var mdata = PerseusFactory.CreateMatrixData(new[, ] {
                { 0.0 }, { 1.0 }, { 2.0 }
            });

            mdata.AddStringColumn("Id", "", new [] { "n1", "n2", "n3" });
            var match     = new MatchingRowsByName();
            var errString = string.Empty;
            var param     = match.GetParameters(new [] { mBase, mdata }, ref errString);

            param.GetParam <int[]>("Copy main columns").Value        = new[] { 0 };
            param.GetParam <int>("Combine copied main values").Value = 5;
            IMatrixData[]   supplTables = null;
            IDocumentData[] documents   = null;
            var             result      = match.ProcessData(new[] { mBase, mdata }, param, ref supplTables, ref documents,
                                                            BaseTest.CreateProcessInfo());

            Assert.AreEqual(result.MultiNumericColumnCount, 1);
            CollectionAssert.AreEqual(new [] { 0.0, 1.0 }, result.MultiNumericColumns[0][0]);
            CollectionAssert.AreEqual(new [] { 2.0 }, result.MultiNumericColumns[0][1]);
        }
Example #12
0
        public void LoadData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents,
                             ProcessInfo processInfo)
        {
            var remoteExe = GetExectuable(param);
            var paramFile = Path.GetTempFileName();

            param.ToFile(paramFile);
            var outFile = Path.GetTempFileName();

            if (!TryGetCodeFile(param, out string codeFile))
            {
                processInfo.ErrString = $"Code file '{codeFile}' was not found";
                return;
            }
            ;
            if (supplTables == null)
            {
                supplTables = Enumerable.Range(0, NumSupplTables).Select(i => PerseusFactory.CreateMatrixData()).ToArray();
            }
            var suppFiles = supplTables.Select(i => Path.GetTempFileName()).ToArray();
            var args      = $"{codeFile} {paramFile} {outFile} {string.Join(" ", suppFiles)}";

            Debug.WriteLine($"executing > {remoteExe} {args}");
            if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0)
            {
                processInfo.ErrString = processInfoErrString;
                return;
            }
            ;
            PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t');
            for (int i = 0; i < NumSupplTables; i++)
            {
                PerseusUtils.ReadMatrixFromFile(supplTables[i], processInfo, suppFiles[i], '\t');
            }
        }
Example #13
0
        public void TestSmallExample()
        {
            var m1 = PerseusFactory.CreateMatrixData(new double[, ] {
                { 0, 1 }, { 2, 3 }
            }, new List <string> {
                "col 1", "col 2"
            });
            var m2 = PerseusFactory.CreateMatrixData(new double[, ] {
                { 4, 5 }, { 6, 7 }
            }, new List <string> {
                "col 2", "col 3"
            });

            var m          = new[] { m1, m2 };
            var matching   = new MatchingColumnsByName();
            var errString  = string.Empty;
            var parameters = matching.GetParameters(m, ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString));

            IMatrixData[]   supplTables = null;
            IDocumentData[] documents   = null;
            var             pInfo       = BaseTest.CreateProcessInfo();
            var             result      = matching.ProcessData(m, parameters, ref supplTables, ref documents, pInfo);

            Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString));

            Assert.AreEqual(4, result.RowCount);
            CollectionAssert.AreEqual(new [] { "col 2", "col 1", "col 3" }, result.ColumnNames);
            CollectionAssert.AreEqual(new [] { 1, 3, 4, 6.0 }, result.Values.GetColumn(0).ToArray());
            CollectionAssert.AreEqual(new [] { 0, 2, double.NaN, double.NaN }, result.Values.GetColumn(1).ToArray());
            CollectionAssert.AreEqual(new [] { double.NaN, double.NaN, 5, 7 }, result.Values.GetColumn(2).ToArray());
        }
        public void TestNetworkFromMatrix()
        {
            if (!PluginInterop.Python.Utils.TryFindPythonExecutable(out string _))
            {
                Assert.Inconclusive("Python not installed");
            }
            Assert.Inconclusive("Cannot be tested without dependency on PerseusLibS");
            var codeString = Properties.Resources.matrix_to_network;
            var codeFile   = Path.GetTempFileName();

            File.WriteAllText(codeFile, Encoding.UTF8.GetString(codeString));
            var processing = new PluginInterop.Python.NetworkFromMatrix();
            var mdata      = PerseusFactory.CreateMatrixData(new [, ] {
                { 0.0, 1.0 }
            }, new List <string> {
                "col 1", "col 2"
            });
            var errString  = string.Empty;
            var parameters = processing.GetParameters(mdata, ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString));
            parameters.GetParam <string>("Script file").Value = codeFile;
            var ndata = PerseusFactoryAnnColumns.CreateNetworkData();

            IData[] suppData = null;
            var     pinfo    = new ProcessInfo(new Settings(), s => { }, i => { }, 1);

            processing.ProcessData(mdata, ndata, parameters, ref suppData, pinfo);
            Assert.IsTrue(string.IsNullOrEmpty(pinfo.ErrString), pinfo.ErrString);
        }
Example #15
0
        public void WriteDataWithAnnotationColumnsTest()
        {
            // main data
            IDataWithAnnotationColumns mdata = PerseusFactory.CreateDataWithAnnotationColumns();

            // annotation columns
            mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" });
            mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" });
            mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 });
            mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} });
            mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } });
            string mdataStr;

            using (MemoryStream memstream = new MemoryStream())
                using (StreamWriter writer = new StreamWriter(memstream)) {
                    PerseusUtils.WriteDataWithAnnotationColumns(mdata, writer);
                    writer.Flush();
                    mdataStr = Encoding.UTF8.GetString(memstream.ToArray());
                }
            IMatrixData mdata3 = PerseusFactory.CreateMatrixData();

            PerseusUtils.ReadMatrix(mdata3, new ProcessInfo(new Settings(), status => { }, progress => { }, 1, i => { }),
                                    () => {
                StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr)));
                return(tmpStream);
            }, "matrix1", '\t');
            IDataWithAnnotationColumns mdata2 = mdata3;

            Assert.AreEqual(2, mdata2.RowCount);
            Assert.AreEqual(2, mdata2.StringColumnCount);
            Assert.AreEqual(1, mdata2.NumericColumnCount);
            Assert.AreEqual(1, mdata2.CategoryColumnCount);
            Assert.AreEqual(1, mdata2.MultiNumericColumnCount);
            Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]);
        }
Example #16
0
        public void TestReadMatrixFromTabsepFileWithDoubleQuotes()
        {
            var mdata       = PerseusFactory.CreateMatrixData();
            var processInfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1);
            var lines       = new[]
            {
                "Col\tStringCol\tNumCol",
                "#!{Type}E\tT\tN",
                "-1.0\thello\t12",
                "1.0\t\"Actin family, ARP subfamily\";Actin family\t4",
                "2.0\t\"Regular quoted text\"\t4",
                "3.0\t\"Escaped\tseparator\"with extra\t4",
                "4.0\t\"Quote between separators\t\"\t4",
                "4.0\tQuote \"in\tthe\" middle\t4",
            };
            var bytes = Encoding.UTF8.GetBytes(string.Join("\n", lines));

            PerseusUtils.ReadMatrix(mdata, processInfo, () => new StreamReader(new MemoryStream(bytes)), "name", '\t');
            Assert.AreEqual("Col", mdata.ColumnNames.Single());
            Assert.AreEqual("StringCol", mdata.StringColumnNames.Single());
            Assert.AreEqual("NumCol", mdata.NumericColumnNames.Single());
            CollectionAssert.AreEqual(new [] { -1.0, 1.0, 2.0, 3.0, 4.0, 4.0 }, mdata.Values.GetColumn(0).ToArray());
            CollectionAssert.AreEqual(new [] { "hello", "\"Actin family, ARP subfamily\";Actin family", "Regular quoted text", "\"Escaped\tseparator\"with extra", "Quote between separators", "Quote \"in\tthe\" middle" }, mdata.StringColumns.Single());
            CollectionAssert.AreEqual(new [] { 12, 4, 4, 4, 4, 4 }, mdata.NumericColumns.Single());
        }
Example #17
0
        public void SmallTest()
        {
            IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ]
            {
                { 0, 4 },
                { 1, 5 },
                { 2, 6 },
                { 3, 7 }
            });

            mdata.AddStringColumn("id", "", new [] { "a", "b", "b", "b" });
            mdata.AddStringColumn("str", "", new [] { "a;b", "b;c", "c;d", "d;e" });
            mdata.AddCategoryColumn("cat", "", new[] { new[] { "a", "b" }, new[] { "b", "c" }, new[] { "c", "d" }, new[] { "d", "e" } });
            mdata.AddNumericColumn("num", "", new [] { 0, 1, 2, 3, 4.0 });
            mdata.AddMultiNumericColumn("mnum", "", new [] { new [] { 0, 4d }, new [] { 1, 5d }, new [] { 2, 6d }, new [] { 3, 7d } });
            mdata.UniqueRows(mdata.StringColumns[0], ArrayUtils.Median, UniqueRows.Union, UniqueRows.CatUnion, UniqueRows.MultiNumUnion);

            Assert.AreEqual(2, mdata.RowCount);
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.Values.GetColumn(0));
            CollectionAssert.AreEqual(new [] { 4, 6 }, mdata.Values.GetColumn(1));
            CollectionAssert.AreEqual(new [] { "a;b", "b;c;d;e" }, mdata.GetStringColumn("str"));
            CollectionAssert.AreEqual(new [] { new [] { "a", "b" }, new [] { "b", "c", "d", "e" } }, mdata.GetCategoryColumnAt(0));
            CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.NumericColumns[0]);
            CollectionAssert.AreEqual(new [] { new [] { 0d, 4 }, new [] { 1d, 5, 2, 6, 3, 7 } }, mdata.MultiNumericColumns[0]);
        }
Example #18
0
        public void TestSmallExampleWithAnnotationColumns()
        {
            var m1 = PerseusFactory.CreateMatrixData(new double[, ] {
                { 0, 1 }, { 2, 3 }
            }, new List <string> {
                "col 1", "col 2"
            });

            m1.AddStringColumn("m1", "", new[] { "a", "b" });
            m1.AddStringColumn("common string column", "", new[] { "c", "d" });
            m1.AddNumericColumn("m1", "", new[] { 0, 1.0 });
            m1.AddNumericColumn("common numeric column", "", new[] { 2, 3.0 });
            m1.AddCategoryColumn("common category column", "", new [] { new [] { "cat1" }, new [] { "cat2", "cat3" } });
            var m2 = PerseusFactory.CreateMatrixData(new double[, ] {
                { 4, 5 }, { 6, 7 }
            }, new List <string> {
                "col 2", "col 3"
            });

            m2.AddStringColumn("common string column", "", new [] { "e", "f" });
            m2.AddStringColumn("m2", "", new [] { "g", "h" });
            m2.AddNumericColumn("common numeric column", "", new[] { 4, 5.0 });
            m2.AddCategoryColumn("common category column", "", new [] { new [] { "cat2" }, new [] { "cat1", "cat4" } });

            var m          = new[] { m1, m2 };
            var matching   = new MatchingColumnsByName();
            var errString  = string.Empty;
            var parameters = matching.GetParameters(m, ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString));

            IMatrixData[]   supplTables = null;
            IDocumentData[] documents   = null;
            var             pInfo       = BaseTest.CreateProcessInfo();
            var             result      = matching.ProcessData(m, parameters, ref supplTables, ref documents, pInfo);

            Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString));

            CollectionAssert.AreEqual(new [] { "common string column", "m1", "m2" }, result.StringColumnNames);
            CollectionAssert.AreEqual(new [] { "c", "d", "e", "f" }, result.StringColumns[0]);
            CollectionAssert.AreEqual(new [] { "a", "b", "", "" }, result.StringColumns[1]);
            CollectionAssert.AreEqual(new [] { "", "", "g", "h" }, result.StringColumns[2]);

            CollectionAssert.AreEqual(new [] { "common numeric column", "m1" }, result.NumericColumnNames);
            CollectionAssert.AreEqual(new [] { 2, 3, 4, 5.0 }, result.NumericColumns[0]);
            CollectionAssert.AreEqual(new [] { 0, 1, double.NaN, double.NaN }, result.NumericColumns[1]);

            CollectionAssert.AreEqual(new [] { "common category column" }, result.CategoryColumnNames);
            var actual   = result.GetCategoryColumnAt(0);
            var expected = new[] { new[] { "cat1" }, new[] { "cat2", "cat3" }, new[] { "cat2" }, new[] { "cat1", "cat4" } };

            Assert.AreEqual(expected.Length, actual.Length);
            for (int i = 0; i < actual.Length; i++)
            {
                CollectionAssert.AreEqual(expected[i], actual[i]);
            }
        }
Example #19
0
        public void TestReadEmptyMatrixFromFile()
        {
            var data = PerseusFactory.CreateDataWithAnnotationColumns();

            PerseusUtils.ReadDataWithAnnotationColumns(data, BaseTest.CreateProcessInfo(), () =>
            {
                var memstream = new MemoryStream(Encoding.UTF8.GetBytes("Node\n#!{Type}T\n"));
                return(new StreamReader(memstream));
            }, "test", '\t');
            Assert.AreEqual(0, data.RowCount);
        }
Example #20
0
        public void TestWriteMultiNumericColumnWithNulls()
        {
            var data = PerseusFactory.CreateDataWithAnnotationColumns();

            data.AddMultiNumericColumn("Test", "", new double[1][]);
            data.AddStringColumn("Test2", "", new string[1]);
            Assert.AreEqual(1, data.RowCount);
            var writer = new StreamWriter(new MemoryStream());

            PerseusUtils.WriteDataWithAnnotationColumns(data, writer);
        }
        public void TestInitialize()
        {
            var peptidesValues = new[, ] {
                { 9.0f }
            };

            peptides = PerseusFactory.CreateMatrixData(peptidesValues, new List <string> {
                "pep_MS/MS Count"
            });
            peptides.AddNumericColumn("pep_Intensity", "", new [] { 0.0 });
            peptides.AddStringColumn("pep_id", "", new [] { "35" });
            peptides.AddStringColumn("pep_Protein group IDs", "", new [] { "13;21" });
            peptides.Quality.Init(1, 1);
            peptides.Quality.Set(0, 0, 1);
            var multiNum    = new ExpandMultiNumeric();
            var errorString = string.Empty;
            var parameters2 = multiNum.GetParameters(peptides, ref errorString);

            parameters2.GetParam <int[]>("Text columns").Value = new[] { 1 };
            IMatrixData[]   suppl = null;
            IDocumentData[] docs  = null;
            multiNum.ProcessData(peptides, parameters2, ref suppl, ref docs, CreateProcessInfo());

            var proteinMainValues = new[, ]
            {
                { 166250000.0f },
                { 8346000.0f }
            };

            proteinMain = PerseusFactory.CreateMatrixData(proteinMainValues, new List <string> {
                "prot_LFQ intensity"
            });
            proteinMain.Name = "protein main";
            proteinMain.AddStringColumn("prot_id", "", new [] { "13", "21" });
            proteinMain.AddStringColumn("prot_gene name", "", new [] { "geneA", "geneB" });
            var expandValues = new[, ]
            {
                { 9.0f },
                { 9.0f }
            };

            expand = PerseusFactory.CreateMatrixData(expandValues, new List <string> {
                "pep_MS/MS Count"
            });
            expand.Name = "expand";
            expand.AddNumericColumn("pep_Intensity", "", new [] { 0.0, 0.0 });
            expand.AddStringColumn("pep_id", "", new [] { "35", "35" });
            expand.AddStringColumn("pep_Protein group IDs", "", new [] { "13", "21" });

            matching = new MatchingRowsByName();
            var err = string.Empty;

            parameters = matching.GetParameters(new[] { expand, proteinMain }, ref err);
        }
        public void TestSummaryStatisticsCanHandleRowWithOnlyNaNs()
        {
            var summaryStatistics = new SummaryStatisticsRows();
            var mdata             = PerseusFactory.CreateMatrixData(new double[, ] {
                { double.NaN, double.NaN }, { double.NaN, double.NaN }
            });
            var errString  = string.Empty;
            var parameters = summaryStatistics.GetParameters(mdata, ref errString);

            IMatrixData[]   supplData = null;
            IDocumentData[] supplDocs = null;
            summaryStatistics.ProcessData(mdata, parameters, ref supplData, ref supplDocs, new ProcessInfo(new Settings(),
                                                                                                           s => { }, i => { }, 1));
            Assert.IsTrue(mdata.IsConsistent(out var consistent), consistent);
        }
Example #23
0
        public void TestNetworkUniqueRows()
        {
            Random       RandGen = new Random();
            INetworkData ndata   = new NetworkData();

            ndata.Name        = "Random network(s)";
            ndata.Description = ndata.Name;
            var n        = 3;
            var numNodes = 100;
            var numEdges = 150;

            for (int i = 0; i < n; i++)
            {
                var graph     = new Graph();//!!!!
                var nodeTable = PerseusFactory.CreateDataWithAnnotationColumns();
                var nodeIndex = new Dictionary <INode, int>();
                var edgeTable = PerseusFactory.CreateDataWithAnnotationColumns();
                var edgeIndex = new Dictionary <IEdge, int>();
                for (int j = 0; j < numNodes; j++)
                {
                    nodeIndex[graph.AddNode()] = j;
                }
                var nodeNames = Enumerable.Range(0, graph.NumberOfNodes).Select(x => $"node {x}").ToArray();
                nodeTable.AddStringColumn("Node", "", nodeNames);
                var nodes   = graph.ToArray();
                var sources = new List <string>();
                var targets = new List <string>();
                for (int j = 0; j < numEdges; j++)
                {
                    var source = nodes[RandGen.Next(0, nodes.Length)];
                    sources.Add(nodeNames[nodeIndex[source]]);
                    var target = nodes[RandGen.Next(0, nodes.Length)];
                    targets.Add(nodeNames[nodeIndex[target]]);
                    edgeIndex[graph.AddEdge(source, target)] = j;
                }
                edgeTable.AddStringColumn("Source", "", sources.ToArray());
                edgeTable.AddStringColumn("Target", "", targets.ToArray());
                var network = new NetworkInfo(graph, nodeTable, nodeIndex, edgeTable, edgeIndex, $"Random {i}");
                ndata.AddNetworks(network);
            }
            foreach (var network in ndata)
            {
                network.UniqueRows(network.EdgeTable.StringColumns[0], network.EdgeTable.StringColumns[1], network.NodeTable.StringColumns[0], ArrayUtils.Median, RemoveDuplicateEdges.Union, RemoveDuplicateEdges.CatUnion, RemoveDuplicateEdges.MultiNumUnion);
                Assert.True(network.EdgeTable.RowCount <= 150);
                Assert.True(network.EdgeTable.RowCount == network.Graph.NumberOfEdges);
                Assert.True(network.NodeTable.RowCount == network.Graph.NumberOfNodes);
            }
        }
        public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents,
                                ProcessInfo processInfo)
        {
            var remoteExe = param.GetParam <string>(InterpreterLabel).Value;

            if (string.IsNullOrWhiteSpace(remoteExe))
            {
                processInfo.ErrString = Resources.RemoteExeNotSpecified;
                return;
            }
            var inFile = Path.GetTempFileName();

            PerseusUtils.WriteMatrixToFile(mdata, inFile, AdditionalMatrices);
            var outFile = Path.GetTempFileName();

            if (!TryGetCodeFile(param, out string codeFile))
            {
                processInfo.ErrString = $"Code file '{codeFile}' was not found";
                return;
            }
            ;
            if (supplTables == null)
            {
                supplTables = Enumerable.Range(0, NumSupplTables).Select(i => PerseusFactory.CreateMatrixData()).ToArray();
            }
            var suppFiles            = supplTables.Select(i => Path.GetTempFileName()).ToArray();
            var commandLineArguments = GetCommandLineArguments(param);
            var args = $"{codeFile} {commandLineArguments} {inFile} {outFile} {string.Join(" ", suppFiles)}";

            Debug.WriteLine($"executing > {remoteExe} {args}");
            if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0)
            {
                processInfo.ErrString = processInfoErrString;
                return;
            }
            ;
            mdata.Clear();
            PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t');
            for (int i = 0; i < NumSupplTables; i++)
            {
                PerseusUtils.ReadMatrixFromFile(supplTables[i], processInfo, suppFiles[i], '\t');
            }
        }
        /// <summary>
        /// An auxiliary method for testing the action of regular expressions.
        /// Limited to a single column, which should be sufficient for this purpose.
        /// Multiple rows are allowed to test the effect of one regex on several strings.
        /// </summary>
        private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect)
        {
            const string name = "Test";

            IMatrixData[]   supplTables       = null;
            IDocumentData[] documents         = null;
            List <string>   stringColumnNames = new List <string> {
                "Column Name"
            };
            List <string[]> stringColumnsInit = new List <string[]> {
                stringsInit
            };
            List <string[]> stringColumnsExpect = new List <string[]> {
                stringsExpect
            };
            Parameters param =
                new Parameters(new Parameter[] {
                new MultiChoiceParam("Columns", new[] { 0 })
                {
                    Values = stringColumnNames
                },
                new StringParam("Regular expression", regexStr), new BoolParam("Keep original columns", false),
                new BoolParam("Strings separated by semicolons are independent", false)
            });
            IMatrixData mdata = PerseusFactory.CreateNewMatrixData();

            mdata.Clear();
            mdata.Name = name;
            mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(),
                                       mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns);
            var ptc = new ProcessTextColumns();

            ptc.ProcessData(mdata, param, ref supplTables, ref documents, null);
            const bool ignoreCase = false;

            for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++)
            {
                Assert.AreEqual(mdata.StringColumns[0][rowInd], stringColumnsExpect[0][rowInd], ignoreCase);
            }
        }
        public void TestSmallExample()
        {
            var values = new[, ]
            {
                { 0.0f, 1.0f, 0f, 5f },
                { 2.0f, 3.0f, 0f, 5f }
            };
            var mdata = PerseusFactory.CreateMatrixData(values, new List <string> {
                "Col___1", "Col___2", "Col___3", "No expand"
            });

            mdata.ColumnDescriptions = new List <string> {
                "Description Col", "Col", "Col", "Description No expand"
            };
            var multiNum = new[]
            {
                new[] { 0.0, 1.0 },
                new[] { 2.0 }
            };

            mdata.AddMultiNumericColumn("MultiNum", "", multiNum);
            var stringCol = new[] { "row1", "row2" };

            mdata.AddStringColumn("String", "", stringCol);
            var expand = new ExpandSiteTable();

            IMatrixData[]   supplData = null;
            IDocumentData[] docs      = null;
            expand.ProcessData(mdata, new Parameters(), ref supplData, ref docs, CreateProcessInfo());
            Assert.AreEqual(2, mdata.ColumnCount);
            CollectionAssert.AreEqual(new [] { "No expand", "Col" }, mdata.ColumnNames.ToArray());
            Assert.AreEqual(2, mdata.ColumnDescriptions.Count);
            CollectionAssert.AreEqual(new [] { "Description No expand", "Description Col" }, mdata.ColumnDescriptions.ToArray());
            Assert.AreEqual(6, mdata.RowCount);
            Assert.AreEqual(2, mdata.StringColumnCount);
            CollectionAssert.AreEqual(new [] { "String", "Unique identifier" }, mdata.StringColumnNames);
            CollectionAssert.AreEqual(stringCol.Concat(stringCol).Concat(stringCol).ToArray(), mdata.StringColumns[0]);
            Assert.AreEqual(1, mdata.MultiNumericColumnCount);
            CollectionAssert.AreEqual(multiNum.Concat(multiNum).Concat(multiNum).ToArray(), mdata.MultiNumericColumns[0]);
        }
        public void TestNumericToMainWithStringRow()
        {
            var random     = new CreateRandomMatrix();
            var errString  = string.Empty;
            var parameters = random.GetParameters(ref errString);

            Assert.IsTrue(string.IsNullOrEmpty(errString), errString);
            var mdata = PerseusFactory.CreateMatrixData();

            IMatrixData[]   suppl  = null;
            IDocumentData[] supplD = null;
            var             pInfo  = new ProcessInfo(new Settings(), s => { }, i => { }, 1);

            random.LoadData(mdata, parameters, ref suppl, ref supplD, pInfo);
            var values = Enumerable.Range(0, mdata.RowCount).Select(i => (double)i).ToArray();

            mdata.AddNumericColumn("Test", "", values);
            mdata.AddStringRow("TestRow", "", mdata.ColumnNames.ToArray());
            Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString);
            Assert.IsTrue(mdata.IsConsistent(out string randCons), randCons);

            var processing = new ChangeColumnType();

            parameters = processing.GetParameters(mdata, ref errString);
            Assert.IsTrue(string.IsNullOrEmpty(errString), errString);
            var param = parameters.GetParamWithSubParams <int>("Source type");

            param.Value = 1;
            var subparam = param.GetSubParameters();

            subparam.GetParam <int[]>("Columns").Value   = new[] { 0 };
            subparam.GetParam <int>("Target type").Value = 1;
            processing.ProcessData(mdata, parameters, ref suppl, ref supplD, pInfo);
            Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString);
            Assert.IsTrue(mdata.IsConsistent(out var isConsistent), isConsistent);
            Assert.AreEqual("Test", mdata.ColumnNames.Last());
            CollectionAssert.AreEqual(values, mdata.Values.GetColumn(mdata.ColumnCount - 1).ToArray());
        }
        public void TestConvertNumericToMultiNumeric()
        {
            var mBase = PerseusFactory.CreateMatrixData();

            mBase.AddStringColumn("Id", "", new [] { "n1;n2", "n3", "n5" });
            Assert.IsTrue(mBase.IsConsistent(out var mBaseConsistent), mBaseConsistent);
            var mdata = PerseusFactory.CreateMatrixData(new[, ] {
                { 0.0 }, { 1.0 }, { 2.0 }, { 3.0 }
            });

            mdata.AddStringColumn("Id", "", new [] { "n1", "n2", "n3", "n4" });
            Assert.IsTrue(mdata.IsConsistent(out var mdataConsistent), mdataConsistent);
            var match     = new MatchingRowsByName();
            var errString = string.Empty;
            var param     = match.GetParameters(new [] { mBase, mdata }, ref errString);

            param.GetParam <int[]>("Copy main columns").Value        = new[] { 0 };
            param.GetParam <int>("Combine copied main values").Value = 5;
            param.GetParam <int>("Join style").Value                = 1;
            param.GetParam <bool>("Add indicator").Value            = true;
            param.GetParam <bool>("Add original row numbers").Value = true;
            IMatrixData[]   supplTables = null;
            IDocumentData[] documents   = null;
            var             result      = match.ProcessData(new[] { mBase, mdata }, param, ref supplTables, ref documents, BaseTest.CreateProcessInfo());
            var             indicator   = result.GetCategoryColumnAt(0).Select(cats => cats.SingleOrDefault() ?? "");

            CollectionAssert.AreEqual(new [] { "+", "+", "", "+" }, indicator);
            CollectionAssert.AreEqual(new [] { "n1;n2", "n3", "n5", "n4" }, result.GetStringColumn("Id"));
            CollectionAssert.AreEqual(new [] { "Original row numbers", "Column 1" }, result.MultiNumericColumnNames);
            CollectionAssert.AreEqual(new [] { 0.0, 1.0 }, result.MultiNumericColumns[0][0]);
            CollectionAssert.AreEqual(new [] { 2.0 }, result.MultiNumericColumns[0][1]);
            CollectionAssert.AreEqual(new double[0], result.MultiNumericColumns[0][2]);
            CollectionAssert.AreEqual(new [] { 3.0 }, result.MultiNumericColumns[0][3]);
            CollectionAssert.AreEqual(new [] { 0.0, 1.0 }, result.MultiNumericColumns[1][0]);
            CollectionAssert.AreEqual(new [] { 2.0 }, result.MultiNumericColumns[1][1]);
            CollectionAssert.AreEqual(new double[0], result.MultiNumericColumns[1][2]);
            CollectionAssert.AreEqual(new [] { 3.0 }, result.MultiNumericColumns[1][3]);
        }
Example #29
0
        /// <summary>
        /// An auxiliary method for testing the action of regular expressions.
        /// Limited to a single column, which should be sufficient for this purpose.
        /// Multiple rows are allowed to test the effect of one regex on several strings.
        /// </summary>
        private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect)
        {
            const string name = "Test";

            IMatrixData[]   supplTables       = null;
            IDocumentData[] documents         = null;
            List <string>   stringColumnNames = new List <string> {
                "Column Name"
            };
            List <string[]> stringColumnsInit = new List <string[]> {
                stringsInit
            };
            List <string[]> stringColumnsExpect = new List <string[]> {
                stringsExpect
            };
            ProcessTextColumns ptc   = new ProcessTextColumns();
            IMatrixData        mdata = PerseusFactory.CreateMatrixData();

            mdata.Clear();
            mdata.Name = name;
            mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(),
                                       mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns);
            string     errorStr = string.Empty;
            Parameters param    = ptc.GetParameters(mdata, ref errorStr);

            param.GetParam <int[]>("Columns").Value              = new[] { 0 };
            param.GetParam <string>("Regular expression").Value  = regexStr;
            param.GetParam <bool>("Keep original columns").Value = false;
            param.GetParam <bool>("Strings separated by semicolons are independent").Value = false;
            ptc.ProcessData(mdata, param, ref supplTables, ref documents, null);
            for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++)
            {
                string expected = mdata.StringColumns[0][rowInd];
                string actual   = stringColumnsExpect[0][rowInd];
                StringAssert.AreEqualIgnoringCase(expected, actual);
            }
        }
        public void TestMatchingCaseInSensitive()
        {
            var mBase = PerseusFactory.CreateMatrixData();

            mBase.AddStringColumn("Name", "", new [] { "A", "a", "B", "b", "C", "c" });
            Assert.IsTrue(mBase.IsConsistent(out var mBaseConsistent), mBaseConsistent);

            var mdata = PerseusFactory.CreateMatrixData();

            mdata.AddStringColumn("Name", "", new [] { "a", "B" });
            Assert.IsTrue(mdata.IsConsistent(out var mdataConsistent), mdataConsistent);
            var match     = new MatchingRowsByName();
            var errString = string.Empty;
            var param     = match.GetParameters(new [] { mBase, mdata }, ref errString);

            param.GetParam <bool>("Add indicator").Value = true;
            param.GetParam <bool>("Ignore case").Value   = true;
            IMatrixData[]   supplTables = null;
            IDocumentData[] documents   = null;
            var             result      = match.ProcessData(new[] { mBase, mdata }, param, ref supplTables, ref documents, BaseTest.CreateProcessInfo());
            var             indicator   = result.GetCategoryColumnAt(0).Select(cats => cats.SingleOrDefault() ?? "").ToArray();

            CollectionAssert.AreEqual(new [] { "+", "+", "+", "+", "", "" }, indicator);
        }