public void TestLoadingRawTxt() { var lines = new[] { "Col_1 Col_2", "a b", }; var tmpFile = Path.GetTempFileName(); File.WriteAllLines(tmpFile, lines); var upload = new UnstructuredTxtUpload(); var errString = string.Empty; var parameters = upload.GetParameters(ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString)); parameters.GetParam <string>("File").Value = tmpFile; var subparam = parameters.GetParamWithSubParams <bool>("Split into columns"); subparam.Value = true; subparam.GetSubParameters().GetParam <int>("Separator").Value = 2; var mdata = PerseusFactory.CreateMatrixData(); IMatrixData[] suppl = null; IDocumentData[] supplD = null; upload.LoadData(mdata, parameters, ref suppl, ref supplD, new ProcessInfo(new Settings(), s => { }, i => { }, 1)); CollectionAssert.AreEqual(new [] { "Col_1", "Col_2" }, mdata.StringColumnNames); CollectionAssert.AreEqual(new [] { "a" }, mdata.StringColumns[0]); CollectionAssert.AreEqual(new [] { "b" }, mdata.StringColumns[1]); }
public void TestMatrixProcessingWithSupplementaryTables() { if (!PluginInterop.Python.Utils.TryFindPythonExecutable(out string _)) { Assert.Inconclusive("Python not installed"); } Assert.Inconclusive("Cannot be tested without dependency on PerseusLibS"); var codeString = Properties.Resources.matrix_with_supp_tables; var codeFile = Path.GetTempFileName(); File.WriteAllText(codeFile, Encoding.UTF8.GetString(codeString)); var processing = new MatrixProcessingWithSupplementaryTables(); var mdata = PerseusFactory.CreateMatrixData(new [, ] { { 0.0, 1.0 } }, new List <string> { "col 1", "col 2" }); var errString = string.Empty; var parameters = processing.GetParameters(mdata, ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString)); parameters.GetParam <string>("Script file").Value = codeFile; IMatrixData[] suppData = null; IDocumentData[] suppDocs = null; var pinfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1); processing.ProcessData(mdata, parameters, ref suppData, ref suppDocs, pinfo); Assert.IsTrue(string.IsNullOrEmpty(pinfo.ErrString), pinfo.ErrString); Assert.AreEqual(2, suppData.Length); foreach (var data in suppData) { Assert.NotNull(data); } }
public void TestLoadingRawTxtFromGzip() { var lines = new[] { "Col_1 Col_2", "a b", }; var tmpFile = Path.GetTempFileName() + ".gz"; using (var memory = new MemoryStream(Encoding.UTF8.GetBytes(string.Join("\n", lines)))) using (var outFile = File.Create(tmpFile)) using (var gzip = new GZipStream(outFile, CompressionMode.Compress)) { memory.CopyTo(gzip); } var upload = new UnstructuredTxtUpload(); var errString = string.Empty; var parameters = upload.GetParameters(ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString)); parameters.GetParam <string>("File").Value = tmpFile; var subparam = parameters.GetParamWithSubParams <bool>("Split into columns"); subparam.Value = true; subparam.GetSubParameters().GetParam <int>("Separator").Value = 2; var mdata = PerseusFactory.CreateMatrixData(); IMatrixData[] suppl = null; IDocumentData[] supplD = null; upload.LoadData(mdata, parameters, ref suppl, ref supplD, new ProcessInfo(new Settings(), s => { }, i => { }, 1)); CollectionAssert.AreEqual(new [] { "Col_1", "Col_2" }, mdata.StringColumnNames); CollectionAssert.AreEqual(new [] { "a" }, mdata.StringColumns[0]); CollectionAssert.AreEqual(new [] { "b" }, mdata.StringColumns[1]); }
private static void ReadMatrixDataInto(IDataWithAnnotationColumns data, string file, ProcessInfo processInfo) { var mdata = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrixFromFile(mdata, processInfo, file, '\t'); data.CopyAnnotationColumnsFrom(mdata); }
public void TestReadWriteMatrixRoundTrip() { var mdata = PerseusFactory.CreateMatrixData(); mdata.AddStringColumn("StringCol", "", new [] { "Regular text", "\"Regular quoted text\"", "\"Quote stops\" in the middle", "\"Escaped\tseparator\"with extra", }); mdata.AddNumericColumn("NumCol", "", mdata.StringColumns[0].Select(_ => 1.0).ToArray()); mdata.AddCategoryColumn("CatCol", "", mdata.StringColumns[0].Select(_ => new string[0]).ToArray()); string content; using (var memory = new MemoryStream()) using (var writer = new StreamWriter(memory)) { PerseusUtils.WriteMatrix(mdata, writer); writer.Flush(); content = Encoding.UTF8.GetString(memory.ToArray()); } var mdata2 = PerseusFactory.CreateMatrixData(); var processInfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1); PerseusUtils.ReadMatrix(mdata2, processInfo, () => new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(content))), "name", '\t'); var expected = mdata.StringColumns[0]; expected[1] = expected[1].Trim('\"'); CollectionAssert.AreEqual(mdata.StringColumns[0], mdata2.StringColumns[0]); }
public void Setup() { _mdata = PerseusFactory.CreateMatrixData(new[, ] { { 0.0, 1.0, 0.0 }, { 0.0, 0.0, 0.0 }, { 0.0, 1.0, 0.0 } }); _mdata.AddStringColumn("test", "", new [] { "a", "b", "a" }); }
/// <summary> /// Read supplementary files according to file paths and data types. /// </summary> /// <param name="suppFiles"></param> /// <param name="supplDataTypes"></param> /// <param name="processInfo"></param> /// <returns></returns> /// <exception cref="NotImplementedException"></exception> public static IData[] ReadSupplementaryData(string[] suppFiles, DataType[] supplDataTypes, ProcessInfo processInfo) { var numSupplTables = suppFiles.Length; IData[] supplData = new IData[numSupplTables]; for (int i = 0; i < numSupplTables; i++) { switch (supplDataTypes[i]) { case DataType.Matrix: var mdata = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrixFromFile(mdata, processInfo, suppFiles[i], '\t'); supplData[i] = mdata; break; case DataType.Network: var ndata = PerseusFactory.CreateNetworkData(); FolderFormat.Read(ndata, suppFiles[i], processInfo); supplData[i] = ndata; break; default: throw new NotImplementedException($"Data type {supplDataTypes[i]} not supported!"); } } return(supplData); }
public void TestIsConsistent() { var random = new CreateRandomMatrix(); var errString = string.Empty; var parameters = random.GetParameters(ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString), errString); var mdata = PerseusFactory.CreateMatrixData(); IMatrixData[] suppl = null; IDocumentData[] supplD = null; var pInfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1); random.LoadData(mdata, parameters, ref suppl, ref supplD, pInfo); Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString); Assert.IsTrue(mdata.IsConsistent(out string randCons), randCons); var transpose = new Transpose(); parameters = transpose.GetParameters(mdata, ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString), errString); transpose.ProcessData(mdata, parameters, ref suppl, ref supplD, pInfo); Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString); Assert.IsTrue(mdata.IsConsistent(out var transCons), transCons); }
public void TestGetParameters() { var processing = new ManageCategoricalAnnotRow(); var errorString = string.Empty; var mdata = PerseusFactory.CreateMatrixData(new[, ] { { 0.0, 0, 0 }, { 1, 1, 1 } }, new List <string> { "a_1", "a_2", "b_1" }); var parameters = processing.GetParameters(mdata, ref errorString); Assert.AreEqual(string.Empty, errorString); var action = parameters.GetParamWithSubParams <int>("Action"); action.Value = 1; action.GetSubParameters().GetParam <string>("Name").Value = "Experiment"; IMatrixData[] suppl = null; IDocumentData[] suppld = null; processing.ProcessData(mdata, parameters, ref suppl, ref suppld, new ProcessInfo(new Settings(), s => { }, i => { }, 1)); Assert.AreEqual("Experiment", mdata.CategoryRowNames[0]); CollectionAssert.AreEquivalent(new [] { "a", "b" }, mdata.GetCategoryRowValuesAt(0)); CollectionAssert.AreEqual(new [] { "a" }, mdata.GetCategoryRowEntryAt(0, 0)); CollectionAssert.AreEqual(new [] { "a" }, mdata.GetCategoryRowEntryAt(0, 1)); CollectionAssert.AreEqual(new [] { "b" }, mdata.GetCategoryRowEntryAt(0, 2)); }
public static void Read(INetworkData ndata, string outFolder, ProcessInfo processInfo) { ReadMatrixDataInto(ndata, Path.Combine(outFolder, "networks.txt"), processInfo); foreach (var netAttr in ndata.GetStringColumn("guid").Zip(ndata.GetStringColumn("name"), (guid, name) => new { guid, name })) { var guid = Guid.Parse(netAttr.guid); var nodeTable = PerseusFactory.CreateDataWithAnnotationColumns(); var edgeTable = PerseusFactory.CreateDataWithAnnotationColumns(); ReadMatrixDataInto(nodeTable, Path.Combine(outFolder, $"{guid}_nodes.txt"), processInfo); ReadMatrixDataInto(edgeTable, Path.Combine(outFolder, $"{guid}_edges.txt"), processInfo); var graph = new Graph(); var nodeIndex = new Dictionary <INode, int>(); var nameToNode = new Dictionary <string, INode>(); var nodeColumn = nodeTable.GetStringColumn("node"); for (int row = 0; row < nodeTable.RowCount; row++) { var node = graph.AddNode(); nodeIndex[node] = row; nameToNode[nodeColumn[row]] = node; } var sourceColumn = edgeTable.GetStringColumn("source"); var targetColumn = edgeTable.GetStringColumn("target"); var edgeIndex = new Dictionary <IEdge, int>(); for (int row = 0; row < edgeTable.RowCount; row++) { var source = nameToNode[sourceColumn[row]]; var target = nameToNode[targetColumn[row]]; var edge = graph.AddEdge(source, target); edgeIndex[edge] = row; } ndata.AddNetworks(new NetworkInfo(graph, nodeTable, nodeIndex, edgeTable, edgeIndex, netAttr.name, guid)); } ReadMatrixDataInto(ndata, Path.Combine(outFolder, "networks.txt"), processInfo); }
public void TestConvertNumericToMultiNumeric() { var mBase = PerseusFactory.CreateMatrixData(); mBase.AddStringColumn("Id", "", new [] { "n1;n2", "n3" }); var mdata = PerseusFactory.CreateMatrixData(new[, ] { { 0.0 }, { 1.0 }, { 2.0 } }); mdata.AddStringColumn("Id", "", new [] { "n1", "n2", "n3" }); var match = new MatchingRowsByName(); var errString = string.Empty; var param = match.GetParameters(new [] { mBase, mdata }, ref errString); param.GetParam <int[]>("Copy main columns").Value = new[] { 0 }; param.GetParam <int>("Combine copied main values").Value = 5; IMatrixData[] supplTables = null; IDocumentData[] documents = null; var result = match.ProcessData(new[] { mBase, mdata }, param, ref supplTables, ref documents, BaseTest.CreateProcessInfo()); Assert.AreEqual(result.MultiNumericColumnCount, 1); CollectionAssert.AreEqual(new [] { 0.0, 1.0 }, result.MultiNumericColumns[0][0]); CollectionAssert.AreEqual(new [] { 2.0 }, result.MultiNumericColumns[0][1]); }
public void LoadData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var remoteExe = GetExectuable(param); var paramFile = Path.GetTempFileName(); param.ToFile(paramFile); var outFile = Path.GetTempFileName(); if (!TryGetCodeFile(param, out string codeFile)) { processInfo.ErrString = $"Code file '{codeFile}' was not found"; return; } ; if (supplTables == null) { supplTables = Enumerable.Range(0, NumSupplTables).Select(i => PerseusFactory.CreateMatrixData()).ToArray(); } var suppFiles = supplTables.Select(i => Path.GetTempFileName()).ToArray(); var args = $"{codeFile} {paramFile} {outFile} {string.Join(" ", suppFiles)}"; Debug.WriteLine($"executing > {remoteExe} {args}"); if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0) { processInfo.ErrString = processInfoErrString; return; } ; PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t'); for (int i = 0; i < NumSupplTables; i++) { PerseusUtils.ReadMatrixFromFile(supplTables[i], processInfo, suppFiles[i], '\t'); } }
public void TestSmallExample() { var m1 = PerseusFactory.CreateMatrixData(new double[, ] { { 0, 1 }, { 2, 3 } }, new List <string> { "col 1", "col 2" }); var m2 = PerseusFactory.CreateMatrixData(new double[, ] { { 4, 5 }, { 6, 7 } }, new List <string> { "col 2", "col 3" }); var m = new[] { m1, m2 }; var matching = new MatchingColumnsByName(); var errString = string.Empty; var parameters = matching.GetParameters(m, ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString)); IMatrixData[] supplTables = null; IDocumentData[] documents = null; var pInfo = BaseTest.CreateProcessInfo(); var result = matching.ProcessData(m, parameters, ref supplTables, ref documents, pInfo); Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString)); Assert.AreEqual(4, result.RowCount); CollectionAssert.AreEqual(new [] { "col 2", "col 1", "col 3" }, result.ColumnNames); CollectionAssert.AreEqual(new [] { 1, 3, 4, 6.0 }, result.Values.GetColumn(0).ToArray()); CollectionAssert.AreEqual(new [] { 0, 2, double.NaN, double.NaN }, result.Values.GetColumn(1).ToArray()); CollectionAssert.AreEqual(new [] { double.NaN, double.NaN, 5, 7 }, result.Values.GetColumn(2).ToArray()); }
public void TestNetworkFromMatrix() { if (!PluginInterop.Python.Utils.TryFindPythonExecutable(out string _)) { Assert.Inconclusive("Python not installed"); } Assert.Inconclusive("Cannot be tested without dependency on PerseusLibS"); var codeString = Properties.Resources.matrix_to_network; var codeFile = Path.GetTempFileName(); File.WriteAllText(codeFile, Encoding.UTF8.GetString(codeString)); var processing = new PluginInterop.Python.NetworkFromMatrix(); var mdata = PerseusFactory.CreateMatrixData(new [, ] { { 0.0, 1.0 } }, new List <string> { "col 1", "col 2" }); var errString = string.Empty; var parameters = processing.GetParameters(mdata, ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString)); parameters.GetParam <string>("Script file").Value = codeFile; var ndata = PerseusFactoryAnnColumns.CreateNetworkData(); IData[] suppData = null; var pinfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1); processing.ProcessData(mdata, ndata, parameters, ref suppData, pinfo); Assert.IsTrue(string.IsNullOrEmpty(pinfo.ErrString), pinfo.ErrString); }
public void WriteDataWithAnnotationColumnsTest() { // main data IDataWithAnnotationColumns mdata = PerseusFactory.CreateDataWithAnnotationColumns(); // annotation columns mdata.AddStringColumn("strcol1", "this is stringcol1", new[] { "1", "2" }); mdata.AddStringColumn("strcol2", "", new[] { "", "hallo" }); mdata.AddNumericColumn("numcol", "", new[] { 1.0, 2.0 }); mdata.AddMultiNumericColumn("multnumcol", "this is multnumcol", new[] { new[] { -2.0, 2.0 }, new double[] {} }); mdata.AddCategoryColumn("catcol", "", new[] { new[] { "cat1", "cat1.1" }, new[] { "cat2", "cat1" } }); string mdataStr; using (MemoryStream memstream = new MemoryStream()) using (StreamWriter writer = new StreamWriter(memstream)) { PerseusUtils.WriteDataWithAnnotationColumns(mdata, writer); writer.Flush(); mdataStr = Encoding.UTF8.GetString(memstream.ToArray()); } IMatrixData mdata3 = PerseusFactory.CreateMatrixData(); PerseusUtils.ReadMatrix(mdata3, new ProcessInfo(new Settings(), status => { }, progress => { }, 1, i => { }), () => { StreamReader tmpStream = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(mdataStr))); return(tmpStream); }, "matrix1", '\t'); IDataWithAnnotationColumns mdata2 = mdata3; Assert.AreEqual(2, mdata2.RowCount); Assert.AreEqual(2, mdata2.StringColumnCount); Assert.AreEqual(1, mdata2.NumericColumnCount); Assert.AreEqual(1, mdata2.CategoryColumnCount); Assert.AreEqual(1, mdata2.MultiNumericColumnCount); Assert.AreEqual("hallo", mdata2.StringColumns[mdata2.StringColumnNames.FindIndex(col => col.Equals("strcol2"))][1]); }
public void TestReadMatrixFromTabsepFileWithDoubleQuotes() { var mdata = PerseusFactory.CreateMatrixData(); var processInfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1); var lines = new[] { "Col\tStringCol\tNumCol", "#!{Type}E\tT\tN", "-1.0\thello\t12", "1.0\t\"Actin family, ARP subfamily\";Actin family\t4", "2.0\t\"Regular quoted text\"\t4", "3.0\t\"Escaped\tseparator\"with extra\t4", "4.0\t\"Quote between separators\t\"\t4", "4.0\tQuote \"in\tthe\" middle\t4", }; var bytes = Encoding.UTF8.GetBytes(string.Join("\n", lines)); PerseusUtils.ReadMatrix(mdata, processInfo, () => new StreamReader(new MemoryStream(bytes)), "name", '\t'); Assert.AreEqual("Col", mdata.ColumnNames.Single()); Assert.AreEqual("StringCol", mdata.StringColumnNames.Single()); Assert.AreEqual("NumCol", mdata.NumericColumnNames.Single()); CollectionAssert.AreEqual(new [] { -1.0, 1.0, 2.0, 3.0, 4.0, 4.0 }, mdata.Values.GetColumn(0).ToArray()); CollectionAssert.AreEqual(new [] { "hello", "\"Actin family, ARP subfamily\";Actin family", "Regular quoted text", "\"Escaped\tseparator\"with extra", "Quote between separators", "Quote \"in\tthe\" middle" }, mdata.StringColumns.Single()); CollectionAssert.AreEqual(new [] { 12, 4, 4, 4, 4, 4 }, mdata.NumericColumns.Single()); }
public void SmallTest() { IMatrixData mdata = PerseusFactory.CreateMatrixData(new double[, ] { { 0, 4 }, { 1, 5 }, { 2, 6 }, { 3, 7 } }); mdata.AddStringColumn("id", "", new [] { "a", "b", "b", "b" }); mdata.AddStringColumn("str", "", new [] { "a;b", "b;c", "c;d", "d;e" }); mdata.AddCategoryColumn("cat", "", new[] { new[] { "a", "b" }, new[] { "b", "c" }, new[] { "c", "d" }, new[] { "d", "e" } }); mdata.AddNumericColumn("num", "", new [] { 0, 1, 2, 3, 4.0 }); mdata.AddMultiNumericColumn("mnum", "", new [] { new [] { 0, 4d }, new [] { 1, 5d }, new [] { 2, 6d }, new [] { 3, 7d } }); mdata.UniqueRows(mdata.StringColumns[0], ArrayUtils.Median, UniqueRows.Union, UniqueRows.CatUnion, UniqueRows.MultiNumUnion); Assert.AreEqual(2, mdata.RowCount); CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.Values.GetColumn(0)); CollectionAssert.AreEqual(new [] { 4, 6 }, mdata.Values.GetColumn(1)); CollectionAssert.AreEqual(new [] { "a;b", "b;c;d;e" }, mdata.GetStringColumn("str")); CollectionAssert.AreEqual(new [] { new [] { "a", "b" }, new [] { "b", "c", "d", "e" } }, mdata.GetCategoryColumnAt(0)); CollectionAssert.AreEqual(new [] { 0, 2 }, mdata.NumericColumns[0]); CollectionAssert.AreEqual(new [] { new [] { 0d, 4 }, new [] { 1d, 5, 2, 6, 3, 7 } }, mdata.MultiNumericColumns[0]); }
public void TestSmallExampleWithAnnotationColumns() { var m1 = PerseusFactory.CreateMatrixData(new double[, ] { { 0, 1 }, { 2, 3 } }, new List <string> { "col 1", "col 2" }); m1.AddStringColumn("m1", "", new[] { "a", "b" }); m1.AddStringColumn("common string column", "", new[] { "c", "d" }); m1.AddNumericColumn("m1", "", new[] { 0, 1.0 }); m1.AddNumericColumn("common numeric column", "", new[] { 2, 3.0 }); m1.AddCategoryColumn("common category column", "", new [] { new [] { "cat1" }, new [] { "cat2", "cat3" } }); var m2 = PerseusFactory.CreateMatrixData(new double[, ] { { 4, 5 }, { 6, 7 } }, new List <string> { "col 2", "col 3" }); m2.AddStringColumn("common string column", "", new [] { "e", "f" }); m2.AddStringColumn("m2", "", new [] { "g", "h" }); m2.AddNumericColumn("common numeric column", "", new[] { 4, 5.0 }); m2.AddCategoryColumn("common category column", "", new [] { new [] { "cat2" }, new [] { "cat1", "cat4" } }); var m = new[] { m1, m2 }; var matching = new MatchingColumnsByName(); var errString = string.Empty; var parameters = matching.GetParameters(m, ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString)); IMatrixData[] supplTables = null; IDocumentData[] documents = null; var pInfo = BaseTest.CreateProcessInfo(); var result = matching.ProcessData(m, parameters, ref supplTables, ref documents, pInfo); Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString)); CollectionAssert.AreEqual(new [] { "common string column", "m1", "m2" }, result.StringColumnNames); CollectionAssert.AreEqual(new [] { "c", "d", "e", "f" }, result.StringColumns[0]); CollectionAssert.AreEqual(new [] { "a", "b", "", "" }, result.StringColumns[1]); CollectionAssert.AreEqual(new [] { "", "", "g", "h" }, result.StringColumns[2]); CollectionAssert.AreEqual(new [] { "common numeric column", "m1" }, result.NumericColumnNames); CollectionAssert.AreEqual(new [] { 2, 3, 4, 5.0 }, result.NumericColumns[0]); CollectionAssert.AreEqual(new [] { 0, 1, double.NaN, double.NaN }, result.NumericColumns[1]); CollectionAssert.AreEqual(new [] { "common category column" }, result.CategoryColumnNames); var actual = result.GetCategoryColumnAt(0); var expected = new[] { new[] { "cat1" }, new[] { "cat2", "cat3" }, new[] { "cat2" }, new[] { "cat1", "cat4" } }; Assert.AreEqual(expected.Length, actual.Length); for (int i = 0; i < actual.Length; i++) { CollectionAssert.AreEqual(expected[i], actual[i]); } }
public void TestReadEmptyMatrixFromFile() { var data = PerseusFactory.CreateDataWithAnnotationColumns(); PerseusUtils.ReadDataWithAnnotationColumns(data, BaseTest.CreateProcessInfo(), () => { var memstream = new MemoryStream(Encoding.UTF8.GetBytes("Node\n#!{Type}T\n")); return(new StreamReader(memstream)); }, "test", '\t'); Assert.AreEqual(0, data.RowCount); }
public void TestWriteMultiNumericColumnWithNulls() { var data = PerseusFactory.CreateDataWithAnnotationColumns(); data.AddMultiNumericColumn("Test", "", new double[1][]); data.AddStringColumn("Test2", "", new string[1]); Assert.AreEqual(1, data.RowCount); var writer = new StreamWriter(new MemoryStream()); PerseusUtils.WriteDataWithAnnotationColumns(data, writer); }
public void TestInitialize() { var peptidesValues = new[, ] { { 9.0f } }; peptides = PerseusFactory.CreateMatrixData(peptidesValues, new List <string> { "pep_MS/MS Count" }); peptides.AddNumericColumn("pep_Intensity", "", new [] { 0.0 }); peptides.AddStringColumn("pep_id", "", new [] { "35" }); peptides.AddStringColumn("pep_Protein group IDs", "", new [] { "13;21" }); peptides.Quality.Init(1, 1); peptides.Quality.Set(0, 0, 1); var multiNum = new ExpandMultiNumeric(); var errorString = string.Empty; var parameters2 = multiNum.GetParameters(peptides, ref errorString); parameters2.GetParam <int[]>("Text columns").Value = new[] { 1 }; IMatrixData[] suppl = null; IDocumentData[] docs = null; multiNum.ProcessData(peptides, parameters2, ref suppl, ref docs, CreateProcessInfo()); var proteinMainValues = new[, ] { { 166250000.0f }, { 8346000.0f } }; proteinMain = PerseusFactory.CreateMatrixData(proteinMainValues, new List <string> { "prot_LFQ intensity" }); proteinMain.Name = "protein main"; proteinMain.AddStringColumn("prot_id", "", new [] { "13", "21" }); proteinMain.AddStringColumn("prot_gene name", "", new [] { "geneA", "geneB" }); var expandValues = new[, ] { { 9.0f }, { 9.0f } }; expand = PerseusFactory.CreateMatrixData(expandValues, new List <string> { "pep_MS/MS Count" }); expand.Name = "expand"; expand.AddNumericColumn("pep_Intensity", "", new [] { 0.0, 0.0 }); expand.AddStringColumn("pep_id", "", new [] { "35", "35" }); expand.AddStringColumn("pep_Protein group IDs", "", new [] { "13", "21" }); matching = new MatchingRowsByName(); var err = string.Empty; parameters = matching.GetParameters(new[] { expand, proteinMain }, ref err); }
public void TestSummaryStatisticsCanHandleRowWithOnlyNaNs() { var summaryStatistics = new SummaryStatisticsRows(); var mdata = PerseusFactory.CreateMatrixData(new double[, ] { { double.NaN, double.NaN }, { double.NaN, double.NaN } }); var errString = string.Empty; var parameters = summaryStatistics.GetParameters(mdata, ref errString); IMatrixData[] supplData = null; IDocumentData[] supplDocs = null; summaryStatistics.ProcessData(mdata, parameters, ref supplData, ref supplDocs, new ProcessInfo(new Settings(), s => { }, i => { }, 1)); Assert.IsTrue(mdata.IsConsistent(out var consistent), consistent); }
public void TestNetworkUniqueRows() { Random RandGen = new Random(); INetworkData ndata = new NetworkData(); ndata.Name = "Random network(s)"; ndata.Description = ndata.Name; var n = 3; var numNodes = 100; var numEdges = 150; for (int i = 0; i < n; i++) { var graph = new Graph();//!!!! var nodeTable = PerseusFactory.CreateDataWithAnnotationColumns(); var nodeIndex = new Dictionary <INode, int>(); var edgeTable = PerseusFactory.CreateDataWithAnnotationColumns(); var edgeIndex = new Dictionary <IEdge, int>(); for (int j = 0; j < numNodes; j++) { nodeIndex[graph.AddNode()] = j; } var nodeNames = Enumerable.Range(0, graph.NumberOfNodes).Select(x => $"node {x}").ToArray(); nodeTable.AddStringColumn("Node", "", nodeNames); var nodes = graph.ToArray(); var sources = new List <string>(); var targets = new List <string>(); for (int j = 0; j < numEdges; j++) { var source = nodes[RandGen.Next(0, nodes.Length)]; sources.Add(nodeNames[nodeIndex[source]]); var target = nodes[RandGen.Next(0, nodes.Length)]; targets.Add(nodeNames[nodeIndex[target]]); edgeIndex[graph.AddEdge(source, target)] = j; } edgeTable.AddStringColumn("Source", "", sources.ToArray()); edgeTable.AddStringColumn("Target", "", targets.ToArray()); var network = new NetworkInfo(graph, nodeTable, nodeIndex, edgeTable, edgeIndex, $"Random {i}"); ndata.AddNetworks(network); } foreach (var network in ndata) { network.UniqueRows(network.EdgeTable.StringColumns[0], network.EdgeTable.StringColumns[1], network.NodeTable.StringColumns[0], ArrayUtils.Median, RemoveDuplicateEdges.Union, RemoveDuplicateEdges.CatUnion, RemoveDuplicateEdges.MultiNumUnion); Assert.True(network.EdgeTable.RowCount <= 150); Assert.True(network.EdgeTable.RowCount == network.Graph.NumberOfEdges); Assert.True(network.NodeTable.RowCount == network.Graph.NumberOfNodes); } }
public void ProcessData(IMatrixData mdata, Parameters param, ref IMatrixData[] supplTables, ref IDocumentData[] documents, ProcessInfo processInfo) { var remoteExe = param.GetParam <string>(InterpreterLabel).Value; if (string.IsNullOrWhiteSpace(remoteExe)) { processInfo.ErrString = Resources.RemoteExeNotSpecified; return; } var inFile = Path.GetTempFileName(); PerseusUtils.WriteMatrixToFile(mdata, inFile, AdditionalMatrices); var outFile = Path.GetTempFileName(); if (!TryGetCodeFile(param, out string codeFile)) { processInfo.ErrString = $"Code file '{codeFile}' was not found"; return; } ; if (supplTables == null) { supplTables = Enumerable.Range(0, NumSupplTables).Select(i => PerseusFactory.CreateMatrixData()).ToArray(); } var suppFiles = supplTables.Select(i => Path.GetTempFileName()).ToArray(); var commandLineArguments = GetCommandLineArguments(param); var args = $"{codeFile} {commandLineArguments} {inFile} {outFile} {string.Join(" ", suppFiles)}"; Debug.WriteLine($"executing > {remoteExe} {args}"); if (Utils.RunProcess(remoteExe, args, processInfo.Status, out string processInfoErrString) != 0) { processInfo.ErrString = processInfoErrString; return; } ; mdata.Clear(); PerseusUtils.ReadMatrixFromFile(mdata, processInfo, outFile, '\t'); for (int i = 0; i < NumSupplTables; i++) { PerseusUtils.ReadMatrixFromFile(supplTables[i], processInfo, suppFiles[i], '\t'); } }
/// <summary> /// An auxiliary method for testing the action of regular expressions. /// Limited to a single column, which should be sufficient for this purpose. /// Multiple rows are allowed to test the effect of one regex on several strings. /// </summary> private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect) { const string name = "Test"; IMatrixData[] supplTables = null; IDocumentData[] documents = null; List <string> stringColumnNames = new List <string> { "Column Name" }; List <string[]> stringColumnsInit = new List <string[]> { stringsInit }; List <string[]> stringColumnsExpect = new List <string[]> { stringsExpect }; Parameters param = new Parameters(new Parameter[] { new MultiChoiceParam("Columns", new[] { 0 }) { Values = stringColumnNames }, new StringParam("Regular expression", regexStr), new BoolParam("Keep original columns", false), new BoolParam("Strings separated by semicolons are independent", false) }); IMatrixData mdata = PerseusFactory.CreateNewMatrixData(); mdata.Clear(); mdata.Name = name; mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(), mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns); var ptc = new ProcessTextColumns(); ptc.ProcessData(mdata, param, ref supplTables, ref documents, null); const bool ignoreCase = false; for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++) { Assert.AreEqual(mdata.StringColumns[0][rowInd], stringColumnsExpect[0][rowInd], ignoreCase); } }
public void TestSmallExample() { var values = new[, ] { { 0.0f, 1.0f, 0f, 5f }, { 2.0f, 3.0f, 0f, 5f } }; var mdata = PerseusFactory.CreateMatrixData(values, new List <string> { "Col___1", "Col___2", "Col___3", "No expand" }); mdata.ColumnDescriptions = new List <string> { "Description Col", "Col", "Col", "Description No expand" }; var multiNum = new[] { new[] { 0.0, 1.0 }, new[] { 2.0 } }; mdata.AddMultiNumericColumn("MultiNum", "", multiNum); var stringCol = new[] { "row1", "row2" }; mdata.AddStringColumn("String", "", stringCol); var expand = new ExpandSiteTable(); IMatrixData[] supplData = null; IDocumentData[] docs = null; expand.ProcessData(mdata, new Parameters(), ref supplData, ref docs, CreateProcessInfo()); Assert.AreEqual(2, mdata.ColumnCount); CollectionAssert.AreEqual(new [] { "No expand", "Col" }, mdata.ColumnNames.ToArray()); Assert.AreEqual(2, mdata.ColumnDescriptions.Count); CollectionAssert.AreEqual(new [] { "Description No expand", "Description Col" }, mdata.ColumnDescriptions.ToArray()); Assert.AreEqual(6, mdata.RowCount); Assert.AreEqual(2, mdata.StringColumnCount); CollectionAssert.AreEqual(new [] { "String", "Unique identifier" }, mdata.StringColumnNames); CollectionAssert.AreEqual(stringCol.Concat(stringCol).Concat(stringCol).ToArray(), mdata.StringColumns[0]); Assert.AreEqual(1, mdata.MultiNumericColumnCount); CollectionAssert.AreEqual(multiNum.Concat(multiNum).Concat(multiNum).ToArray(), mdata.MultiNumericColumns[0]); }
public void TestNumericToMainWithStringRow() { var random = new CreateRandomMatrix(); var errString = string.Empty; var parameters = random.GetParameters(ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString), errString); var mdata = PerseusFactory.CreateMatrixData(); IMatrixData[] suppl = null; IDocumentData[] supplD = null; var pInfo = new ProcessInfo(new Settings(), s => { }, i => { }, 1); random.LoadData(mdata, parameters, ref suppl, ref supplD, pInfo); var values = Enumerable.Range(0, mdata.RowCount).Select(i => (double)i).ToArray(); mdata.AddNumericColumn("Test", "", values); mdata.AddStringRow("TestRow", "", mdata.ColumnNames.ToArray()); Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString); Assert.IsTrue(mdata.IsConsistent(out string randCons), randCons); var processing = new ChangeColumnType(); parameters = processing.GetParameters(mdata, ref errString); Assert.IsTrue(string.IsNullOrEmpty(errString), errString); var param = parameters.GetParamWithSubParams <int>("Source type"); param.Value = 1; var subparam = param.GetSubParameters(); subparam.GetParam <int[]>("Columns").Value = new[] { 0 }; subparam.GetParam <int>("Target type").Value = 1; processing.ProcessData(mdata, parameters, ref suppl, ref supplD, pInfo); Assert.IsTrue(string.IsNullOrEmpty(pInfo.ErrString), pInfo.ErrString); Assert.IsTrue(mdata.IsConsistent(out var isConsistent), isConsistent); Assert.AreEqual("Test", mdata.ColumnNames.Last()); CollectionAssert.AreEqual(values, mdata.Values.GetColumn(mdata.ColumnCount - 1).ToArray()); }
public void TestConvertNumericToMultiNumeric() { var mBase = PerseusFactory.CreateMatrixData(); mBase.AddStringColumn("Id", "", new [] { "n1;n2", "n3", "n5" }); Assert.IsTrue(mBase.IsConsistent(out var mBaseConsistent), mBaseConsistent); var mdata = PerseusFactory.CreateMatrixData(new[, ] { { 0.0 }, { 1.0 }, { 2.0 }, { 3.0 } }); mdata.AddStringColumn("Id", "", new [] { "n1", "n2", "n3", "n4" }); Assert.IsTrue(mdata.IsConsistent(out var mdataConsistent), mdataConsistent); var match = new MatchingRowsByName(); var errString = string.Empty; var param = match.GetParameters(new [] { mBase, mdata }, ref errString); param.GetParam <int[]>("Copy main columns").Value = new[] { 0 }; param.GetParam <int>("Combine copied main values").Value = 5; param.GetParam <int>("Join style").Value = 1; param.GetParam <bool>("Add indicator").Value = true; param.GetParam <bool>("Add original row numbers").Value = true; IMatrixData[] supplTables = null; IDocumentData[] documents = null; var result = match.ProcessData(new[] { mBase, mdata }, param, ref supplTables, ref documents, BaseTest.CreateProcessInfo()); var indicator = result.GetCategoryColumnAt(0).Select(cats => cats.SingleOrDefault() ?? ""); CollectionAssert.AreEqual(new [] { "+", "+", "", "+" }, indicator); CollectionAssert.AreEqual(new [] { "n1;n2", "n3", "n5", "n4" }, result.GetStringColumn("Id")); CollectionAssert.AreEqual(new [] { "Original row numbers", "Column 1" }, result.MultiNumericColumnNames); CollectionAssert.AreEqual(new [] { 0.0, 1.0 }, result.MultiNumericColumns[0][0]); CollectionAssert.AreEqual(new [] { 2.0 }, result.MultiNumericColumns[0][1]); CollectionAssert.AreEqual(new double[0], result.MultiNumericColumns[0][2]); CollectionAssert.AreEqual(new [] { 3.0 }, result.MultiNumericColumns[0][3]); CollectionAssert.AreEqual(new [] { 0.0, 1.0 }, result.MultiNumericColumns[1][0]); CollectionAssert.AreEqual(new [] { 2.0 }, result.MultiNumericColumns[1][1]); CollectionAssert.AreEqual(new double[0], result.MultiNumericColumns[1][2]); CollectionAssert.AreEqual(new [] { 3.0 }, result.MultiNumericColumns[1][3]); }
/// <summary> /// An auxiliary method for testing the action of regular expressions. /// Limited to a single column, which should be sufficient for this purpose. /// Multiple rows are allowed to test the effect of one regex on several strings. /// </summary> private static void TestRegex(string regexStr, string[] stringsInit, string[] stringsExpect) { const string name = "Test"; IMatrixData[] supplTables = null; IDocumentData[] documents = null; List <string> stringColumnNames = new List <string> { "Column Name" }; List <string[]> stringColumnsInit = new List <string[]> { stringsInit }; List <string[]> stringColumnsExpect = new List <string[]> { stringsExpect }; ProcessTextColumns ptc = new ProcessTextColumns(); IMatrixData mdata = PerseusFactory.CreateMatrixData(); mdata.Clear(); mdata.Name = name; mdata.SetAnnotationColumns(stringColumnNames, stringColumnsInit, mdata.CategoryColumnNames, new List <string[][]>(), mdata.NumericColumnNames, mdata.NumericColumns, mdata.MultiNumericColumnNames, mdata.MultiNumericColumns); string errorStr = string.Empty; Parameters param = ptc.GetParameters(mdata, ref errorStr); param.GetParam <int[]>("Columns").Value = new[] { 0 }; param.GetParam <string>("Regular expression").Value = regexStr; param.GetParam <bool>("Keep original columns").Value = false; param.GetParam <bool>("Strings separated by semicolons are independent").Value = false; ptc.ProcessData(mdata, param, ref supplTables, ref documents, null); for (int rowInd = 0; rowInd < stringColumnsInit[0].Length; rowInd++) { string expected = mdata.StringColumns[0][rowInd]; string actual = stringColumnsExpect[0][rowInd]; StringAssert.AreEqualIgnoringCase(expected, actual); } }
public void TestMatchingCaseInSensitive() { var mBase = PerseusFactory.CreateMatrixData(); mBase.AddStringColumn("Name", "", new [] { "A", "a", "B", "b", "C", "c" }); Assert.IsTrue(mBase.IsConsistent(out var mBaseConsistent), mBaseConsistent); var mdata = PerseusFactory.CreateMatrixData(); mdata.AddStringColumn("Name", "", new [] { "a", "B" }); Assert.IsTrue(mdata.IsConsistent(out var mdataConsistent), mdataConsistent); var match = new MatchingRowsByName(); var errString = string.Empty; var param = match.GetParameters(new [] { mBase, mdata }, ref errString); param.GetParam <bool>("Add indicator").Value = true; param.GetParam <bool>("Ignore case").Value = true; IMatrixData[] supplTables = null; IDocumentData[] documents = null; var result = match.ProcessData(new[] { mBase, mdata }, param, ref supplTables, ref documents, BaseTest.CreateProcessInfo()); var indicator = result.GetCategoryColumnAt(0).Select(cats => cats.SingleOrDefault() ?? "").ToArray(); CollectionAssert.AreEqual(new [] { "+", "+", "+", "+", "", "" }, indicator); }