Esempio n. 1
0
        public static void Register(CommandLineApplication cmdApp, Microsoft.Extensions.PlatformAbstractions.IApplicationEnvironment appEnvironment, Microsoft.Extensions.PlatformAbstractions.IAssemblyLoadContextAccessor loadContextAccessor, Microsoft.Extensions.PlatformAbstractions.IRuntimeEnvironment runtimeEnvironment)
        {
            cmdApp.Command("graph", (Action <CommandLineApplication>)(c => {
                c.Description = "Perform parsing, static analysis, semantic analysis, and type inference";

                c.HelpOption("-?|-h|--help");

                c.OnExecute((Func <System.Threading.Tasks.Task <int> >)(async() => {
                    var jsonIn     = await Console.In.ReadToEndAsync();
                    var sourceUnit = JsonConvert.DeserializeObject <SourceUnit>(jsonIn);

                    var root    = Directory.GetCurrentDirectory();
                    var dir     = Path.Combine(root, sourceUnit.Dir);
                    var context = new GraphContext
                    {
                        RootPath            = root,
                        SourceUnit          = sourceUnit,
                        ProjectDirectory    = dir,
                        HostEnvironment     = appEnvironment,
                        LoadContextAccessor = loadContextAccessor,
                        RuntimeEnvironment  = runtimeEnvironment
                    };

                    var result = await GraphRunner.Graph(context);

                    Console.WriteLine(JsonConvert.SerializeObject(result, Formatting.Indented));
                    return(0);
                }));
            }));
        }
        static void Main(string[] args)
        {
            IRunner linkedListRunner = new LinkedListRunner();
            //linkedListRunner.Run();

            IRunner btRunner = new BTRunner();
            //btRunner.Run();

            IRunner stackRunner = new StackRunner();
            //stackRunner.Run();

            IRunner queueRunner = new QueueRunner();
            //queueRunner.Run();

            IRunner arrayRunner = new ArrayRunner();
            //arrayRunner.Run();

            IRunner stringRunner = new StringRunner();
            //stringRunner.Run();

            IRunner heapRunner = new HeapRunner();
            //heapRunner.Run();

            IRunner unionFindRunner = new UnionFindRunner();
            //unionFindRunner.Run();

            IRunner graphRunner = new GraphRunner();
            //graphRunner.Run();

            IRunner dpRunner = new DPRunner();
            //dpRunner.Run();

            IRunner codilityRunner = new CodilityRunner();

            codilityRunner.Run();
        }
Esempio n. 3
0
        public void CanSuccessfullyTrimSpaces()
        {
            string dataPath   = GetDataPath("TrimData.csv");
            string inputGraph = @"{
                'Nodes':
                [{
                        'Name': 'Data.TextLoader',
                        'Inputs': {
                            'InputFile': '$inputFile',
                            'Arguments': {
                                'UseThreads': true,
                                'HeaderFile': null,
                                'MaxRows': null,
                                'AllowQuoting': false,
                                'AllowSparse': false,
                                'InputSize': null,
                                'Separator': [
                                    ','
                                ],
                                'Column': [{
                                        'Name': 'ID',
                                        'Type': 'R4',
                                        'Source': [{
                                                'Min': 0,
                                                'Max': 0,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }, {
                                        'Name': 'Text',
                                        'Type': 'TX',
                                        'Source': [{
                                                'Min': 1,
                                                'Max': 1,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }
                                ],
                                'TrimWhitespace': true,
                                'HasHeader': true
                            }
                        },
                        'Outputs': {
                            'Data': '$data'
                        }
                    }
                ]
            }";

            JObject graph     = JObject.Parse(inputGraph);
            var     runner    = new GraphRunner(_env, graph[FieldNames.Nodes] as JArray);
            var     inputFile = new SimpleFileHandle(_env, dataPath, false, false);

            runner.SetInput("inputFile", inputFile);
            runner.RunAll();

            var data = runner.GetOutput <IDataView>("data");

            Assert.NotNull(data);

            using (var cursor = data.GetRowCursorForAllColumns())
            {
                var idGetter   = cursor.GetGetter <float>(cursor.Schema[0]);
                var textGetter = cursor.GetGetter <ReadOnlyMemory <char> >(cursor.Schema[1]);

                Assert.True(cursor.MoveNext());

                float id = 0;
                idGetter(ref id);
                Assert.Equal(1, id);

                ReadOnlyMemory <char> text = new ReadOnlyMemory <char>();
                textGetter(ref text);
                Assert.Equal("There is a space at the end", text.ToString());

                Assert.True(cursor.MoveNext());

                id = 0;
                idGetter(ref id);
                Assert.Equal(2, id);

                text = new ReadOnlyMemory <char>();
                textGetter(ref text);
                Assert.Equal("There is no space at the end", text.ToString());

                Assert.False(cursor.MoveNext());
            }
        }
Esempio n. 4
0
        public void CanSuccessfullyRetrieveSparseData()
        {
            string dataPath   = GetDataPath("SparseData.txt");
            string inputGraph = @"
            {
                'Nodes':
                [{
                        'Name': 'Data.TextLoader',
                        'Inputs': {
                            'InputFile': '$inputFile',
                            'Arguments': {
                                'UseThreads': true,
                                'HeaderFile': null,
                                'MaxRows': null,
                                'AllowQuoting': false,
                                'AllowSparse': true,
                                'InputSize': null,
                                'Separator': [
                                    '\t'
                                ],
                                'Column': [{
                                        'Name': 'C1',
                                        'Type': 'R4',
                                        'Source': [{
                                                'Min': 0,
                                                'Max': 0,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }, {
                                        'Name': 'C2',
                                        'Type': 'R4',
                                        'Source': [{
                                                'Min': 1,
                                                'Max': 1,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }, {
                                        'Name': 'C3',
                                        'Type': 'R4',
                                        'Source': [{
                                                'Min': 2,
                                                'Max': 2,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }, {
                                        'Name': 'C4',
                                        'Type': 'R4',
                                        'Source': [{
                                                'Min': 3,
                                                'Max': 3,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }, {
                                        'Name': 'C5',
                                        'Type': 'R4',
                                        'Source': [{
                                                'Min': 4,
                                                'Max': 4,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }
                                ],
                                'TrimWhitespace': false,
                                'HasHeader': true
                            }
                        },
                        'Outputs': {
                            'Data': '$data'
                        }
                    }
                ]
            }";

            JObject graph     = JObject.Parse(inputGraph);
            var     runner    = new GraphRunner(_env, graph[FieldNames.Nodes] as JArray);
            var     inputFile = new SimpleFileHandle(_env, dataPath, false, false);

            runner.SetInput("inputFile", inputFile);
            runner.RunAll();

            var data = runner.GetOutput <IDataView>("data");

            Assert.NotNull(data);

            using (var cursor = data.GetRowCursorForAllColumns())
            {
                var getters = new ValueGetter <float>[] {
                    cursor.GetGetter <float>(cursor.Schema[0]),
                    cursor.GetGetter <float>(cursor.Schema[1]),
                    cursor.GetGetter <float>(cursor.Schema[2]),
                    cursor.GetGetter <float>(cursor.Schema[3]),
                    cursor.GetGetter <float>(cursor.Schema[4])
                };


                Assert.True(cursor.MoveNext());

                float[] targets = new float[] { 1, 2, 3, 4, 5 };
                for (int i = 0; i < getters.Length; i++)
                {
                    float value = 0;
                    getters[i](ref value);
                    Assert.Equal(targets[i], value);
                }

                Assert.True(cursor.MoveNext());

                targets = new float[] { 0, 0, 0, 4, 5 };
                for (int i = 0; i < getters.Length; i++)
                {
                    float value = 0;
                    getters[i](ref value);
                    Assert.Equal(targets[i], value);
                }

                Assert.True(cursor.MoveNext());

                targets = new float[] { 0, 2, 0, 0, 0 };
                for (int i = 0; i < getters.Length; i++)
                {
                    float value = 0;
                    getters[i](ref value);
                    Assert.Equal(targets[i], value);
                }

                Assert.False(cursor.MoveNext());
            }
        }
Esempio n. 5
0
        public void CanSuccessfullyApplyATransform()
        {
            string inputGraph = @"
            {
                'Nodes':
                [{
                        'Name': 'Data.TextLoader',
                        'Inputs': {
                            'InputFile': '$inputFile',
                            'Arguments': {
                                'UseThreads': true,
                                'HeaderFile': null,
                                'MaxRows': null,
                                'AllowQuoting': true,
                                'AllowSparse': true,
                                'InputSize': null,
                                'Separator': [
                                    '\t'
                                ],
                                'Column': [{
                                        'Name': 'String1',
                                        'Type': 'TX',
                                        'Source': [{
                                                'Min': 0,
                                                'Max': 0,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }, {
                                        'Name': 'Number1',
                                        'Type': 'R4',
                                        'Source': [{
                                                'Min': 1,
                                                'Max': 1,
                                                'AutoEnd': false,
                                                'VariableEnd': false,
                                                'AllOther': false,
                                                'ForceVector': false
                                            }
                                        ],
                                        'KeyCount': null
                                    }
                                ],
                                'TrimWhitespace': false,
                                'HasHeader': false
                            }
                        },
                        'Outputs': {
                            'Data': '$data'
                        }
                    }
                ]
            }";

            JObject graph     = JObject.Parse(inputGraph);
            var     runner    = new GraphRunner(_env, graph[FieldNames.Nodes] as JArray);
            var     inputFile = new SimpleFileHandle(_env, "fakeFile.txt", false, false);

            runner.SetInput("inputFile", inputFile);
            runner.RunAll();

            var data = runner.GetOutput <IDataView>("data");

            Assert.NotNull(data);
        }
Esempio n. 6
0
        public void CanSuccessfullyRetrieveQuotedData()
        {
            string dataPath   = GetDataPath("QuotingData.csv");
            string inputGraph = @"
            {  
               'Nodes':[  
                  {  
                     'Name':'Data.TextLoader',
                     'Inputs':{  
                        'InputFile':'$inputFile',
                        'Arguments':{  
                           'UseThreads':true,
                           'HeaderFile':null,
                           'MaxRows':null,
                           'AllowQuoting':true,
                           'AllowSparse':false,
                           'InputSize':null,
                           'Separator':[  
                              ','
                           ],
                           'Column':[  
                              {  
                                 'Name':'ID',
                                 'Type':'R4',
                                 'Source':[  
                                    {  
                                       'Min':0,
                                       'Max':0,
                                       'AutoEnd':false,
                                       'VariableEnd':false,
                                       'AllOther':false,
                                       'ForceVector':false
                                    }
                                 ],
                                 'KeyCount':null
                              },
                              {  
                                 'Name':'Text',
                                 'Type':'TX',
                                 'Source':[  
                                    {  
                                       'Min':1,
                                       'Max':1,
                                       'AutoEnd':false,
                                       'VariableEnd':false,
                                       'AllOther':false,
                                       'ForceVector':false
                                    }
                                 ],
                                 'KeyCount':null
                              }
                           ],
                           'TrimWhitespace':false,
                           'HasHeader':true
                        }
                     },
                     'Outputs':{  
                        'Data':'$data'
                     }
                  }
               ]
            }";

            JObject graph     = JObject.Parse(inputGraph);
            var     runner    = new GraphRunner(env, graph[FieldNames.Nodes] as JArray);
            var     inputFile = new SimpleFileHandle(env, dataPath, false, false);

            runner.SetInput("inputFile", inputFile);
            runner.RunAll();

            var data = runner.GetOutput <IDataView>("data"); Assert.NotNull(data);

            using (var cursor = data.GetRowCursorForAllColumns())
            {
                var IDGetter   = cursor.GetGetter <float>(0);
                var TextGetter = cursor.GetGetter <ReadOnlyMemory <char> >(1);

                Assert.True(cursor.MoveNext());

                float ID = 0;
                IDGetter(ref ID);
                Assert.Equal(1, ID);

                ReadOnlyMemory <char> Text = new ReadOnlyMemory <char>();
                TextGetter(ref Text);
                Assert.Equal("This text contains comma, within quotes.", Text.ToString());

                Assert.True(cursor.MoveNext());

                ID = 0;
                IDGetter(ref ID);
                Assert.Equal(2, ID);

                Text = new ReadOnlyMemory <char>();
                TextGetter(ref Text);
                Assert.Equal("This text contains extra punctuations and special characters.;*<>?!@#$%^&*()_+=-{}|[]:;'", Text.ToString());

                Assert.True(cursor.MoveNext());

                ID = 0;
                IDGetter(ref ID);
                Assert.Equal(3, ID);

                Text = new ReadOnlyMemory <char>();
                TextGetter(ref Text);
                Assert.Equal("This text has no quotes", Text.ToString());

                Assert.False(cursor.MoveNext());
            }
        }
Esempio n. 7
0
        public void PipelineSweeperRequestedLearners()
        {
            // Get datasets
            var          pathData        = GetDataPath("adult.train");
            var          pathDataTest    = GetDataPath("adult.test");
            const int    numOfSampleRows = 100;
            const string schema          =
                "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
                "col=relationship:TX:7 col=race:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(Env,
                                                         new ImportTextData.Input {
                InputFile = inputFileTrain, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(Env,
                                                          new ImportTextData.Input {
                InputFile = inputFileTest, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
            var requestedLearners = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" };
#pragma warning restore 0618
            // Define entrypoint graph
            string inputGraph = @"
                {
                  'Nodes': [                                
                    {
                      'Name': 'Models.PipelineSweeper',
                      'Inputs': {
                        'TrainingData': '$TrainingData',
                        'TestingData': '$TestingData',
                        'StateArguments': {
                            'Name': 'AutoMlState',
                            'Settings': {
                                'Metric': 'Auc',
                                'Engine': {
                                    'Name': 'Rocket',
                                    'Settings' : {
                                        'TopKLearners' : 2,
                                        'SecondRoundTrialsPerLearner' : 0
                                    },
                                },
                                'TerminatorArgs': {
                                    'Name': 'IterationLimited',
                                    'Settings': {
                                        'FinalHistoryLength': 35
                                    }
                                },
                                'TrainerKind': 'SignatureBinaryClassifierTrainer',
                                'RequestedLearners' : [
                                    'LogisticRegressionBinaryClassifier',
                                    'FastTreeBinaryClassifier'
                                ]
                            }
                        },
                        'BatchSize': 5
                      },
                      'Outputs': {
                        'State': '$StateOut',
                        'Results': '$ResultsOut'
                      }
                    },
                  ]
                }";

            JObject graph   = JObject.Parse(inputGraph);
            var     catalog = Env.ComponentCatalog;

            var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray);
            runner.SetInput("TrainingData", datasetTrain);
            runner.SetInput("TestingData", datasetTest);
            runner.RunAll();

            var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut");
            Assert.NotNull(autoMlState);
            var space = autoMlState.GetSearchSpace();

            // Make sure only learners left are those retained.
            Assert.Equal(requestedLearners.Length, space.Item2.Length);
            Assert.True(space.Item2.All(l => requestedLearners.Any(r => r == l.LearnerName)));
        }
Esempio n. 8
0
        public void PipelineSweeperRocketEngine()
        {
            // Get datasets
            var          pathData        = GetDataPath("adult.train");
            var          pathDataTest    = GetDataPath("adult.test");
            const int    numOfSampleRows = 1000;
            int          numIterations   = 35;
            const string schema          =
                "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
                "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(Env,
                                                         new ImportTextData.Input {
                InputFile = inputFileTrain, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(Env,
                                                          new ImportTextData.Input {
                InputFile = inputFileTest, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
#pragma warning restore 0618
            // Define entrypoint graph
            string inputGraph = @"
                {
                  'Nodes': [                                
                    {
                      'Name': 'Models.PipelineSweeper',
                      'Inputs': {
                        'TrainingData': '$TrainingData',
                        'TestingData': '$TestingData',
                        'StateArguments': {
                            'Name': 'AutoMlState',
                            'Settings': {
                                'Metric': 'Auc',
                                'Engine': {
                                    'Name': 'Rocket',
                                    'Settings' : {
                                        'TopKLearners' : 2,
                                        'SecondRoundTrialsPerLearner' : 5
                                    },
                                },
                                'TerminatorArgs': {
                                    'Name': 'IterationLimited',
                                    'Settings': {
                                        'FinalHistoryLength': 35
                                    }
                                },
                                'TrainerKind': 'SignatureBinaryClassifierTrainer'
                            }
                        },
                        'BatchSize': 5
                      },
                      'Outputs': {
                        'State': '$StateOut',
                        'Results': '$ResultsOut'
                      }
                    },
                  ]
                }";

            JObject graph   = JObject.Parse(inputGraph);
            var     catalog = Env.ComponentCatalog;

            var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray);
            runner.SetInput("TrainingData", datasetTrain);
            runner.SetInput("TestingData", datasetTest);
            runner.RunAll();

            var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut");
            Assert.NotNull(autoMlState);
            var allPipelines = autoMlState.GetAllEvaluatedPipelines();
            var bestPipeline = autoMlState.GetBestPipeline();
            Assert.Equal(allPipelines.Length, numIterations);
            Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1);

            var results = runner.GetOutput <IDataView>("ResultsOut");
            Assert.NotNull(results);
            var rows = PipelinePattern.ExtractResults(Env, results,
                                                      "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
            Assert.True(rows.Length == numIterations);
        }
Esempio n. 9
0
        public void PipelineSweeperMultiClassClassification()
        {
            // Get datasets
            // TODO (agoswami) : For now we use the same dataset for train and test since the repo does not have a separate test file for the iris dataset.
            // In the future the PipelineSweeper Macro will have an option to take just one dataset as input, and do the train-test split internally.
            var          pathData       = GetDataPath(@"iris.txt");
            var          pathDataTest   = GetDataPath(@"iris.txt");
            int          numIterations  = 2;
            const string schema         = "col=Species:R4:0 col=SepalLength:R4:1 col=SepalWidth:R4:2 col=PetalLength:R4:3 col=PetalWidth:R4:4";
            var          inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input {
                InputFile = inputFileTrain, CustomSchema = schema
            }).Data;
            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(Env, new ImportTextData.Input {
                InputFile = inputFileTest, CustomSchema = schema
            }).Data;
#pragma warning restore 0618

            // Define entrypoint graph
            string inputGraph = @"
                {
                  'Nodes': [
                    {
                      'Name': 'Models.PipelineSweeper',
                      'Inputs': {
                        'TrainingData': '$TrainingData',
                        'TestingData': '$TestingData',
                        'LabelColumns': ['Species'],
                        'StateArguments': {
                            'Name': 'AutoMlState',
                            'Settings': {
                                'Metric': 'AccuracyMicro',
                                'Engine': {
                                    'Name': 'Defaults'
                                },
                                'TerminatorArgs': {
                                    'Name': 'IterationLimited',
                                    'Settings': {
                                        'FinalHistoryLength': 2
                                    }
                                },
                                'TrainerKind': 'SignatureMultiClassClassifierTrainer',
                                'RequestedLearners' : [
                                    'LogisticRegressionClassifier',
                                    'StochasticDualCoordinateAscentClassifier'
                                ]
                            }
                        },
                        'BatchSize': 1
                      },
                      'Outputs': {
                        'State': '$StateOut',
                        'Results': '$ResultsOut'
                      }
                    },
                  ]
                }";

            JObject graphJson = JObject.Parse(inputGraph);
            var     catalog   = Env.ComponentCatalog;
            var     runner    = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray);
            runner.SetInput("TrainingData", datasetTrain);
            runner.SetInput("TestingData", datasetTest);
            runner.RunAll();

            var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut");
            Assert.NotNull(autoMlState);
            var allPipelines = autoMlState.GetAllEvaluatedPipelines();
            var bestPipeline = autoMlState.GetBestPipeline();
            Assert.Equal(allPipelines.Length, numIterations);

            var bestMicroAccuracyTrain = bestPipeline.PerformanceSummary.TrainingMetricValue;
            var bestMicroAccuracyTest  = bestPipeline.PerformanceSummary.MetricValue;
            Assert.True((0.97 < bestMicroAccuracyTrain) && (bestMicroAccuracyTrain < 0.99));
            Assert.True((0.97 < bestMicroAccuracyTest) && (bestMicroAccuracyTest < 0.99));

            var results = runner.GetOutput <IDataView>("ResultsOut");
            Assert.NotNull(results);
            var rows = PipelinePattern.ExtractResults(Env, results,
                                                      "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
            Assert.True(rows.Length == numIterations);
            Assert.True(rows.All(r => r.MetricValue > 0.9));
        }
Esempio n. 10
0
        public void PipelineSweeperBasic()
        {
            // Get datasets
            var       pathData        = GetDataPath("adult.tiny.with-schema.txt");
            var       pathDataTest    = GetDataPath("adult.tiny.with-schema.txt");
            const int numOfSampleRows = 1000;
            int       numIterations   = 4;
            var       inputFileTrain  = new SimpleFileHandle(Env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(Env,
                                                         new ImportTextData.Input {
                InputFile = inputFileTrain
            }).Data.Take(numOfSampleRows);
            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(Env,
                                                          new ImportTextData.Input {
                InputFile = inputFileTest
            }).Data.Take(numOfSampleRows);
#pragma warning restore 0618
            // Define entrypoint graph
            string inputGraph = @"
                {
                  'Nodes': [                                
                    {
                      'Name': 'Models.PipelineSweeper',
                      'Inputs': {
                        'TrainingData': '$TrainingData',
                        'TestingData': '$TestingData',
                        'StateArguments': {
                            'Name': 'AutoMlState',
                            'Settings': {
                                'Metric': 'Auc',
                                'Engine': {
                                    'Name': 'UniformRandom'
                                },
                                'TerminatorArgs': {
                                    'Name': 'IterationLimited',
                                    'Settings': {
                                        'FinalHistoryLength': 4
                                    }
                                },
                                'TrainerKind': 'SignatureBinaryClassifierTrainer'
                            }
                        },
                        'BatchSize': 2
                      },
                      'Outputs': {
                        'State': '$StateOut',
                        'Results': '$ResultsOut'
                      }
                    },
                  ]
                }";

            JObject graph   = JObject.Parse(inputGraph);
            var     catalog = Env.ComponentCatalog;

            var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray);
            runner.SetInput("TrainingData", datasetTrain);
            runner.SetInput("TestingData", datasetTest);
            runner.RunAll();

            var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut");
            Assert.NotNull(autoMlState);
            var allPipelines = autoMlState.GetAllEvaluatedPipelines();
            var bestPipeline = autoMlState.GetBestPipeline();
            Assert.Equal(allPipelines.Length, numIterations);
            Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1);

            var results = runner.GetOutput <IDataView>("ResultsOut");
            Assert.NotNull(results);
            var rows = PipelinePattern.ExtractResults(Env, results,
                                                      "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
            Assert.True(rows.Length == numIterations);
            Assert.True(rows.All(r => r.TrainingMetricValue > 0.1));
        }
Esempio n. 11
0
        public void PipelineSweeperRoles()
        {
            // Get datasets
            var          pathData        = GetDataPath("adult.train");
            var          pathDataTest    = GetDataPath("adult.test");
            const int    numOfSampleRows = 100;
            int          numIterations   = 2;
            const string schema          =
                "sep=, col=age:R4:0 col=workclass:TX:1 col=fnlwgt:R4:2 col=education:TX:3 col=education_num:R4:4 col=marital_status:TX:5 col=occupation:TX:6 " +
                "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=Features:R4:10-12 col=native_country:TX:13 col=IsOver50K_:R4:14 header=+";
            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(Env,
                                                         new ImportTextData.Input {
                InputFile = inputFileTrain, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(Env,
                                                          new ImportTextData.Input {
                InputFile = inputFileTest, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
#pragma warning restore 0618

            // Define entrypoint graph
            string inputGraph = @"
                {
                  'Nodes': [
                    {
                      'Name': 'Models.PipelineSweeper',
                      'Inputs': {
                        'TrainingData': '$TrainingData',
                        'TestingData': '$TestingData',
                        'LabelColumns': ['IsOver50K_'],
                        'WeightColumns': ['education_num'],
                        'NameColumns': ['education'],
                        'TextFeatureColumns': ['workclass', 'marital_status', 'occupation'],
                        'StateArguments': {
                            'Name': 'AutoMlState',
                            'Settings': {
                                'Metric': 'Auc',
                                'Engine': {
                                    'Name': 'Defaults'
                                },
                                'TerminatorArgs': {
                                    'Name': 'IterationLimited',
                                    'Settings': {
                                        'FinalHistoryLength': 2
                                    }
                                },
                                'TrainerKind': 'SignatureBinaryClassifierTrainer',
                                'RequestedLearners' : [
                                    'LogisticRegressionBinaryClassifier',
                                    'FastTreeBinaryClassifier'
                                ]
                            }
                        },
                        'BatchSize': 1
                      },
                      'Outputs': {
                        'State': '$StateOut',
                        'Results': '$ResultsOut'
                      }
                    },
                  ]
                }";

            JObject graphJson = JObject.Parse(inputGraph);
            var     catalog   = Env.ComponentCatalog;
            var     runner    = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray);
            runner.SetInput("TrainingData", datasetTrain);
            runner.SetInput("TestingData", datasetTest);
            runner.RunAll();

            var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut");
            Assert.NotNull(autoMlState);
            var allPipelines = autoMlState.GetAllEvaluatedPipelines();
            var bestPipeline = autoMlState.GetBestPipeline();
            Assert.Equal(allPipelines.Length, numIterations);

            var trainAuc = bestPipeline.PerformanceSummary.TrainingMetricValue;
            var testAuc  = bestPipeline.PerformanceSummary.MetricValue;
            Assert.True((0.94 < trainAuc) && (trainAuc < 0.95));
            Assert.True((0.815 < testAuc) && (testAuc < 0.825));

            var results = runner.GetOutput <IDataView>("ResultsOut");
            Assert.NotNull(results);
            var rows = PipelinePattern.ExtractResults(Env, results,
                                                      "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
            Assert.True(rows.Length == numIterations);
            Assert.True(rows.All(r => r.TrainingMetricValue > 0.1));
        }
Esempio n. 12
0
        public void PipelineSweeperSerialization()
        {
            // Get datasets
            var          pathData        = GetDataPath("adult.train");
            var          pathDataTest    = GetDataPath("adult.test");
            const int    numOfSampleRows = 1000;
            int          numIterations   = 10;
            const string schema          =
                "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " +
                "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+";
            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(Env,
                                                         new ImportTextData.Input {
                InputFile = inputFileTrain, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(Env,
                                                          new ImportTextData.Input {
                InputFile = inputFileTest, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
#pragma warning restore 0618

            // Define entrypoint graph
            string inputGraph = @"
                {
                  'Nodes': [
                    {
                      'Name': 'Models.PipelineSweeper',
                      'Inputs': {
                        'TrainingData': '$TrainingData',
                        'TestingData': '$TestingData',
                        'StateArguments': {
                            'Name': 'AutoMlState',
                            'Settings': {
                                'Metric': 'Auc',
                                'Engine': {
                                    'Name': 'UniformRandom'
                                },
                                'TerminatorArgs': {
                                    'Name': 'IterationLimited',
                                    'Settings': {
                                        'FinalHistoryLength': 10
                                    }
                                },
                                'TrainerKind': 'SignatureBinaryClassifierTrainer'
                            }
                        },
                        'BatchSize': 5
                      },
                      'Outputs': {
                        'State': '$StateOut',
                        'Results': '$ResultsOut'
                      }
                    },
                  ]
                }";

            JObject graphJson = JObject.Parse(inputGraph);
            var     catalog   = Env.ComponentCatalog;
            var     graph     = new EntryPointGraph(Env, catalog, graphJson[FieldNames.Nodes] as JArray);
            // Test if ToJson() works properly.
            var nodes  = new JArray(graph.AllNodes.Select(node => node.ToJson()));
            var runner = new GraphRunner(Env, catalog, nodes);
            runner.SetInput("TrainingData", datasetTrain);
            runner.SetInput("TestingData", datasetTest);
            runner.RunAll();

            var results = runner.GetOutput <IDataView>("ResultsOut");
            Assert.NotNull(results);
            var rows = PipelinePattern.ExtractResults(Env, results,
                                                      "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel");
            Assert.True(rows.Length == numIterations);
        }
Esempio n. 13
0
        private static void RunGraphCore(EnvironmentBlock *penv, IHostEnvironment env, string graphStr, int cdata, DataSourceBlock **ppdata)
        {
            Contracts.AssertValue(env);

            var     host = env.Register("RunGraph", penv->seed, null);
            JObject graph;

            try
            {
                graph = JObject.Parse(graphStr);
            }
            catch (JsonReaderException ex)
            {
                throw host.Except(ex, "Failed to parse experiment graph: {0}", ex.Message);
            }

            var runner = new GraphRunner(host, graph["nodes"] as JArray);

            var dvNative = new IDataView[cdata];

            try
            {
                for (int i = 0; i < cdata; i++)
                {
                    dvNative[i] = new NativeDataView(host, ppdata[i]);
                }

                // Setting inputs.
                var jInputs = graph["inputs"] as JObject;
                if (graph["inputs"] != null && jInputs == null)
                {
                    throw host.Except("Unexpected value for 'inputs': {0}", graph["inputs"]);
                }
                int iDv = 0;
                if (jInputs != null)
                {
                    foreach (var kvp in jInputs)
                    {
                        var pathValue = kvp.Value as JValue;
                        if (pathValue == null)
                        {
                            throw host.Except("Invalid value for input: {0}", kvp.Value);
                        }

                        var path    = pathValue.Value <string>();
                        var varName = kvp.Key;
                        var type    = runner.GetPortDataKind(varName);

                        switch (type)
                        {
                        case TlcModule.DataKind.FileHandle:
                            var fh = new SimpleFileHandle(host, path, false, false);
                            runner.SetInput(varName, fh);
                            break;

                        case TlcModule.DataKind.DataView:
                            IDataView dv;
                            if (!string.IsNullOrWhiteSpace(path))
                            {
                                var extension = Path.GetExtension(path);
                                if (extension == ".txt")
                                {
                                    dv = TextLoader.LoadFile(host, new TextLoader.Options(), new MultiFileSource(path));
                                }
                                else if (extension == ".dprep")
                                {
                                    dv = LoadDprepFile(BytesToString(penv->pythonPath), path);
                                }
                                else
                                {
                                    dv = new BinaryLoader(host, new BinaryLoader.Arguments(), path);
                                }
                            }
                            else
                            {
                                Contracts.Assert(iDv < dvNative.Length);
                                // prefetch all columns
                                dv = dvNative[iDv++];
                                var prefetch = new int[dv.Schema.Count];
                                for (int i = 0; i < prefetch.Length; i++)
                                {
                                    prefetch[i] = i;
                                }
                                dv = new CacheDataView(host, dv, prefetch);
                            }
                            runner.SetInput(varName, dv);
                            break;

                        case TlcModule.DataKind.PredictorModel:
                            PredictorModel pm;
                            if (!string.IsNullOrWhiteSpace(path))
                            {
                                using (var fs = File.OpenRead(path))
                                    pm = new PredictorModelImpl(host, fs);
                            }
                            else
                            {
                                throw host.Except("Model must be loaded from a file");
                            }
                            runner.SetInput(varName, pm);
                            break;

                        case TlcModule.DataKind.TransformModel:
                            TransformModel tm;
                            if (!string.IsNullOrWhiteSpace(path))
                            {
                                using (var fs = File.OpenRead(path))
                                    tm = new TransformModelImpl(host, fs);
                            }
                            else
                            {
                                throw host.Except("Model must be loaded from a file");
                            }
                            runner.SetInput(varName, tm);
                            break;

                        default:
                            throw host.Except("Port type {0} not supported", type);
                        }
                    }
                }
                runner.RunAll();

                // Reading outputs.
                using (var ch = host.Start("Reading outputs"))
                {
                    var jOutputs = graph["outputs"] as JObject;
                    if (jOutputs != null)
                    {
                        foreach (var kvp in jOutputs)
                        {
                            var pathValue = kvp.Value as JValue;
                            if (pathValue == null)
                            {
                                throw host.Except("Invalid value for input: {0}", kvp.Value);
                            }
                            var path    = pathValue.Value <string>();
                            var varName = kvp.Key;
                            var type    = runner.GetPortDataKind(varName);

                            switch (type)
                            {
                            case TlcModule.DataKind.FileHandle:
                                var fh = runner.GetOutput <IFileHandle>(varName);
                                throw host.ExceptNotSupp("File handle outputs not yet supported.");

                            case TlcModule.DataKind.DataView:
                                var idv = runner.GetOutput <IDataView>(varName);
                                if (path == CSR_MATRIX)
                                {
                                    SendViewToNativeAsCsr(ch, penv, idv);
                                }
                                else if (!string.IsNullOrWhiteSpace(path))
                                {
                                    SaveIdvToFile(idv, path, host);
                                }
                                else
                                {
                                    var infos = ProcessColumns(ref idv, penv->maxSlots, host);
                                    SendViewToNativeAsDataFrame(ch, penv, idv, infos);
                                }
                                break;

                            case TlcModule.DataKind.PredictorModel:
                                var pm = runner.GetOutput <PredictorModel>(varName);
                                if (!string.IsNullOrWhiteSpace(path))
                                {
                                    SavePredictorModelToFile(pm, path, host);
                                }
                                else
                                {
                                    throw host.Except("Returning in-memory models is not supported");
                                }
                                break;

                            case TlcModule.DataKind.TransformModel:
                                var tm = runner.GetOutput <TransformModel>(varName);
                                if (!string.IsNullOrWhiteSpace(path))
                                {
                                    using (var fs = File.OpenWrite(path))
                                        tm.Save(host, fs);
                                }
                                else
                                {
                                    throw host.Except("Returning in-memory models is not supported");
                                }
                                break;

                            case TlcModule.DataKind.Array:
                                var objArray = runner.GetOutput <object[]>(varName);
                                if (objArray is PredictorModel[])
                                {
                                    var modelArray = (PredictorModel[])objArray;
                                    // Save each model separately
                                    for (var i = 0; i < modelArray.Length; i++)
                                    {
                                        var modelPath = string.Format(CultureInfo.InvariantCulture, path, i);
                                        SavePredictorModelToFile(modelArray[i], modelPath, host);
                                    }
                                }
                                else
                                {
                                    throw host.Except("DataKind.Array type {0} not supported", objArray.First().GetType());
                                }
                                break;

                            default:
                                throw host.Except("Port type {0} not supported", type);
                            }
                        }
                    }
                }
            }
            finally
            {
                // The raw data view is disposable so it lets go of unmanaged raw pointers before we return.
                for (int i = 0; i < dvNative.Length; i++)
                {
                    var view = dvNative[i];
                    if (view == null)
                    {
                        continue;
                    }
                    host.Assert(view is IDisposable);
                    var disp = (IDisposable)dvNative[i];
                    disp.Dispose();
                }
            }
        }
        public void Run()
        {
            var graphRunner = new GraphRunner(_env, _graph);

            graphRunner.RunAll();
        }
Esempio n. 15
0
        public static void Compare(string[] args)
        {
            if (args.Length < 5)
            {
                throw new ArgumentException("You must inform two output file names and two file tipes");
            }
            Graph graph = null;

            if (args.Length >= 6)
            {
                graph = Import.LoadCityFromText(Constants.PATH_GRAPH + args[5]);
            }

            // var path = "";
            var path = Constants.PATH_OUTPUTS;

            Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;

            ILineImporter javaImporter = new JavaResultLineImporter();
            ILineImporter csImporter   = new CSResultLineImporter();

            var measuresOrigin = Import.LoadRouteMeasuresFromTxt(path + args[1], args[3] == "java" ? javaImporter  : csImporter);
            var measuresTarget = Import.LoadRouteMeasuresFromTxt(path + args[2], args[4] == "java" ? javaImporter  : csImporter);

            Console.WriteLine("Outputs imported");

            var countRight = 0;
            List <Tuple <RouteMeasures, RouteMeasures> > differentRoutesStatus   = new List <Tuple <RouteMeasures, RouteMeasures> >();
            List <Tuple <RouteMeasures, RouteMeasures> > differentRoutesDistance = new List <Tuple <RouteMeasures, RouteMeasures> >();

            double totalTimeDifferenceForEqualPaths     = 0;
            double totalTimeDifferenceForDifferentPaths = 0;
            double totalTimeDifferenceAllPaths          = 0;

            double totalDistanceDifferenceAllPaths          = 0;
            double totalDistanceDifferenceForEqualPaths     = 0;
            double totalDistanceDifferenceForDifferentPaths = 0;

            foreach (var measureOrigin in measuresOrigin)
            {
                if (measuresTarget.ContainsKey(measureOrigin.Key))
                {
                    RouteMeasures routeMeasureOrigin = measureOrigin.Value;
                    RouteMeasures routeMeasureTarget = measuresTarget[measureOrigin.Key];

                    if (routeMeasureOrigin.Status == routeMeasureTarget.Status)
                    {
                        if (Math.Abs(routeMeasureOrigin.Distance - routeMeasureTarget.Distance) < Constants.DISTANCE_DIFFERENCE_THRESHOLD)
                        {
                            countRight++;
                            totalTimeDifferenceForEqualPaths += (routeMeasureOrigin.DeltaTime - routeMeasureTarget.DeltaTime).TotalMinutes;
                            // totalDistanceDifference += (routeMeasureOrigin.Distance - routeMeasureTarget.Distance);
                            totalDistanceDifferenceForEqualPaths += (routeMeasureOrigin.Distance - routeMeasureTarget.Distance);
                        }
                        else
                        {
                            totalTimeDifferenceForDifferentPaths += (routeMeasureOrigin.DeltaTime - routeMeasureTarget.DeltaTime).TotalMinutes;
                            differentRoutesDistance.Add(new Tuple <RouteMeasures, RouteMeasures>(routeMeasureOrigin, routeMeasureTarget));
                            totalDistanceDifferenceForDifferentPaths += (routeMeasureOrigin.Distance - routeMeasureTarget.Distance);
                        }

                        totalTimeDifferenceAllPaths     += (routeMeasureOrigin.DeltaTime - routeMeasureTarget.DeltaTime).TotalMinutes;
                        totalDistanceDifferenceAllPaths += (routeMeasureOrigin.Distance - routeMeasureTarget.Distance);
                    }
                    else
                    {
                        differentRoutesStatus.Add(new Tuple <RouteMeasures, RouteMeasures>(routeMeasureOrigin, routeMeasureTarget));
                    }
                }
                else
                {
                    throw new ArgumentException("The paths don't represent the same OD file, because the pair " +
                                                measureOrigin.Key + " in the origin file couldn't be found in the target path");
                }
            }
            GraphRunner graphRunner = null;

            Console.WriteLine();

            if (graph != null)
            {
                graphRunner = new GraphRunner(graph);
            }
            differentRoutesDistance.ForEach(t =>
            {
                Console.WriteLine(
                    t.Item1.SourceId + "," + t.Item1.TargetId + ": " +
                    t.Item1.Distance.ToString() + " <> " +
                    t.Item2.Distance.ToString() + " = " +
                    (t.Item1.Distance - t.Item2.Distance).ToString() +
                    (graphRunner != null ?
                     "\n\t GraphRunner => " + graphRunner.PathSize(t.Item1).ToString() + " <> " + graphRunner.PathSize(t.Item2).ToString() : "-")
                    );
                Console.WriteLine();
            });
            // 3338853805,571723594  5980,10491667755 => 5979,52324019751 => 0,581676480044734

            Console.WriteLine();

            Console.WriteLine("Resultados negativos simbolizam que os valores para os resultados da direita são maiores");
            Console.WriteLine("Resultados iguais       : " + countRight.ToString());
            Console.WriteLine("Resultados diferentes   : " + (differentRoutesDistance.Count + differentRoutesStatus.Count).ToString());
            Console.WriteLine("\tStatus diferentes     : " + differentRoutesStatus.Count.ToString());
            Console.WriteLine("\tDistâncias diferentes : " + differentRoutesDistance.Count.ToString());
            Console.WriteLine("Diferença de distância  : " + totalDistanceDifferenceAllPaths.ToString());
            Console.WriteLine("\tCaminhos iguais       : " + totalDistanceDifferenceForEqualPaths.ToString());
            Console.WriteLine("\tCaminhos diferentes   : " + totalDistanceDifferenceForDifferentPaths.ToString());
            Console.WriteLine("Diferença de tempo      : " + totalTimeDifferenceAllPaths.ToString());
            Console.WriteLine("\tCaminhos iguais       : " + totalTimeDifferenceForEqualPaths.ToString());
            Console.WriteLine("\tCaminhos diferentes   : " + totalTimeDifferenceForDifferentPaths.ToString());
        }