public void TestI_ScalerTransformSerialize() { using (var host = EnvHelper.NewTestEnvironment()) { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = host.CreateStreamingDataView(inputs); var data = host.CreateTransform("Scaler{col=X}", loader); (data as ITrainableTransform).Estimate(); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); var nb = DataViewUtils.ComputeRowCount(data); if (nb < 1) { throw new Exception("empty view"); } // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline. // It checks it gives the same result. TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestTagViewTransform() { using (var host = EnvHelper.NewTestEnvironment()) { var inputs = new[] { new ExampleA() { X = new float[] { 0, 1 } }, new ExampleA() { X = new float[] { 2, 3 } } }; IDataView loader = host.CreateStreamingDataView(inputs); var data = host.CreateTransform("Scaler{col=X1:X}", loader); data = host.CreateTransform("tag{t=memory}", data); var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestI_PolynomialTransformSerialize() { using (var host = EnvHelper.NewTestEnvironment()) { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = host.CreateStreamingDataView(inputs); var data = host.CreateTransform("poly{col=poly:X d=3}", loader); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); // This function serializes the output data twice, once before saving the pipeline, once after loading the pipeline. // It checks it gives the same result. TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestChainTransformSerialize() { using (var host = EnvHelper.NewTestEnvironment()) { var inputs = new[] { new ExampleA() { X = new float[] { 1, 10, 100 } }, new ExampleA() { X = new float[] { 2, 3, 5 } } }; IDataView loader = host.CreateStreamingDataView(inputs); IDataTransform data = host.CreateTransform("Scaler{col=X4:X}", loader); data = host.CreateTransform("ChainTrans{ xf1=Scaler{col=X2:X} xf2=Poly{col=X3:X2} }", data); // We create a specific folder in build/UnitTest which will contain the output. var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestDBScanTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("three_classes_2d.txt"); var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { //var loader = env.CreateLoader("text{col=RowId:I4:0 col=Features:R4:1-2 header=+}", new MultiFileSource(dataFilePath)); var loader = TextLoader.Create(env, new TextLoader.Arguments() { HasHeader = true, Column = new[] { TextLoader.Column.Parse("RowId:R4:0"), TextLoader.Column.Parse("Features:R4:1-2") } }, new MultiFileSource(dataFilePath)); var xf = env.CreateTransform("DBScan{col=Features}", loader); string schema = SchemaHelper.ToString(xf.Schema); if (string.IsNullOrEmpty(schema)) { throw new Exception("Schema is null."); } if (!schema.Contains("Cluster")) { throw new Exception("Schema does not contain Cluster."); } if (!schema.Contains("Score")) { throw new Exception("Schema does not contain Score."); } StreamHelper.SaveModel(env, xf, outModelFilePath); var saver = env.CreateSaver("Text{header=- schema=-}"); using (var fs2 = File.Create(outputDataFilePath)) saver.SaveData(fs2, TestTransformHelper.AddFlatteningTransform(env, xf), StreamHelper.GetColumnsIndex(xf.Schema, new[] { "Features", "ClusterId", "Score" })); // Checking the values. var lines = File.ReadAllLines(outputDataFilePath).Select(c => c.Split('\t')).Where(c => c.Length == 4); if (!lines.Any()) { throw new Exception(string.Format("The output file is empty or not containing three columns '{0}'", outputDataFilePath)); } var clusters = lines.Select(c => c[1]).Distinct(); if (clusters.Count() <= 1) { throw new Exception("Only one cluster, this is unexpected."); } // Serialization. var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(env, outModelFilePath, xf, loader, outData, outData2); } }
public void TrainTestPipelinePredictTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("mc_iris.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 header=+}", new MultiFileSource(dataFilePath)); var pipe = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader); pipe = env.CreateTransform("SplitTrainTest{col=base tag=train tag=test}", pipe); pipe = env.CreateTransform("SelectTag{tag=unused selectTag=train}", pipe); pipe = env.CreateTransform(string.Format("TagTrainScore{{tag=trainP out={0} tr=mlr}}", outModelFilePath), pipe); pipe = env.CreateTransform("SelectTag{tag=scoredTrain selectTag=test}", pipe); pipe = env.CreateTransform("TagPredict{in=trainP}", pipe); string schema = SchemaHelper.ToString(pipe.Schema); var cursor = pipe.GetRowCursor(i => true); string schema2 = SchemaHelper.ToString(cursor.Schema); if (schema != schema2) { throw new Exception("Schema mismatch."); } long count = DataViewUtils.ComputeRowCount(pipe); if (count != 49) { throw new Exception(string.Format("Unexpected number of rows {0}", count)); } // Checks the outputs. var saver = env.CreateSaver("Text"); var columns = new string[pipe.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = pipe.Schema[i].Name; } using (var fs2 = File.Create(outData)) saver.SaveData(fs2, pipe, StreamHelper.GetColumnsIndex(pipe.Schema)); var lines = File.ReadAllLines(outData); if (lines.Length < 40) { throw new Exception("Something is missing:" + string.Join("\n", lines)); } if (lines.Length > 70) { throw new Exception("Too much data:" + string.Join("\n", lines)); } TestTransformHelper.SerializationTestTransform(env, outModelFilePath, pipe, loader, outData, outData2); } }
public void TestOpticsOrderingTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("three_classes_2d.txt"); var outputDataFilePath = FileHelper.GetOutputFile("outputDataFilePath.txt", methodName); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); /*using (*/ var env = EnvHelper.NewTestEnvironment(); { var loader = env.CreateLoader("text{col=DataViewRowId:I4:0 col=Features:R4:1-2 header=+}", new MultiFileSource(dataFilePath)); var xf = env.CreateTransform("OpticsOrd{col=Features epsilon=0.3 minPoints=6}", loader); string schema = SchemaHelper.ToString(xf.Schema); if (string.IsNullOrEmpty(schema)) { throw new Exception("DataViewSchema is null."); } if (!schema.Contains("Ordering")) { throw new Exception("DataViewSchema does not contain Ordering."); } if (!schema.Contains("Reachability")) { throw new Exception("DataViewSchema does not contain Reachability."); } StreamHelper.SaveModel(env, xf, outModelFilePath); var saver = env.CreateSaver("Text{header=- schema=-}"); using (var fs2 = File.Create(outputDataFilePath)) { saver.SaveData(fs2, TestTransformHelper.AddFlatteningTransform(env, xf), StreamHelper.GetColumnsIndex(xf.Schema, new[] { "Features", "Ordering", "Reachability" })); } // Checking the values. var lines = File.ReadAllLines(outputDataFilePath).Select(c => c.Split('\t')).Where(c => c.Length == 4); if (!lines.Any()) { throw new Exception(string.Format("The output file is empty or not containing three columns '{0}'", outputDataFilePath)); } var clusters = lines.Select(c => c[1]).Distinct(); if (clusters.Count() <= 1) { throw new Exception("Only one cluster, this is unexpected."); } // Serialization. var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(env, outModelFilePath, xf, loader, outData, outData2); } }
public void TestTimeSeriesDeTrendSerialize() { using (var host = EnvHelper.NewTestEnvironment()) { var inputs = new[] { new InputOutput() { X = 7f, time = 0f }, new InputOutput() { X = 7f, time = 1f }, new InputOutput() { X = 9f, time = 2f }, new InputOutput() { X = 9f, time = 3f }, new InputOutput() { X = 8f, time = 4f }, }; IDataView loader = host.CreateStreamingDataView(inputs); var data = host.CreateTransform("detrend{col=Y:X time=time optim=sasdcar{iter=50}}", loader); // To train the model. using (var cursor = data.GetRowCursor(i => true)) { } var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); TestTransformHelper.SerializationTestTransform(host, outModelFilePath, data, loader, outData, outData2); } }
public void TestI_PassThroughTransform() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var dataFilePath = FileHelper.GetTestFile("iris.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); var tempFile = FileHelper.GetOutputFile("dump.idv", methodName); using (var env = EnvHelper.NewTestEnvironment(conc: 1)) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 col=Uid:TX:5 header=+}", new MultiFileSource(dataFilePath)); var xf1 = env.CreateTransform("Concat{col=Feat:Slength,Swidth}", loader); var xf2 = env.CreateTransform("Scaler{col=Feat}", xf1); var xf3 = env.CreateTransform(string.Format("DumpView{{s=+ f={0}}}", tempFile), xf2); TestTransformHelper.SerializationTestTransform(env, outModelFilePath, xf3, loader, outData, outData2, false); if (!File.Exists(tempFile)) { throw new FileNotFoundException(tempFile); } } }
public static void TrainkNNTransformId(int k, NearestNeighborsWeights weight, int threads, string distance = "L2") { var methodName = string.Format("{0}-k{1}-W{2}-T{3}-D{4}", System.Reflection.MethodBase.GetCurrentMethod().Name, k, weight, threads, distance); var dataFilePath = FileHelper.GetTestFile("iris_binary_id.txt"); var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var outData = FileHelper.GetOutputFile("outData1.txt", methodName); var outData2 = FileHelper.GetOutputFile("outData2.txt", methodName); var env = k == 1 ? EnvHelper.NewTestEnvironment(conc: 1) : EnvHelper.NewTestEnvironment(); using (env) { var loader = env.CreateLoader("Text{col=Label:R4:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 col=Uid:I8:5 header=+}", new MultiFileSource(dataFilePath)); var concat = env.CreateTransform("Concat{col=Features:Slength,Swidth}", loader); if (distance == "cosine") { concat = env.CreateTransform("Scaler{col=Features}", concat); } concat = env.CreateTransform("knntr{k=5 id=Uid}", concat); long nb = DataViewUtils.ComputeRowCount(concat); if (nb == 0) { throw new System.Exception("Empty pipeline."); } using (var cursor = concat.GetRowCursor(i => true)) { var getdist = cursor.GetGetter <VBuffer <float> >(7); var getid = cursor.GetGetter <VBuffer <long> >(8); var ddist = new VBuffer <float>(); var did = new VBuffer <long>(); while (cursor.MoveNext()) { getdist(ref ddist); getid(ref did); if (!ddist.IsDense || !did.IsDense) { throw new System.Exception("not dense"); } if (ddist.Count != did.Count) { throw new System.Exception("not the same dimension"); } for (int i = 1; i < ddist.Count; ++i) { if (ddist.Values[i - 1] > ddist.Values[i]) { throw new System.Exception("not sorted"); } if (did.Values[i] % 2 != 1) { throw new System.Exception("wrong id"); } } } } TestTransformHelper.SerializationTestTransform(env, outModelFilePath, concat, loader, outData, outData2, false); } }