public void TestCreateTextInstancesConstant() { TestDataset breast = TestDatasets.breastCancerConst; string trainData = GetDataPath(breast.trainFilename); var prefix = TestContext.TestName + "-"; string outName = prefix + "BreastCancer.txt"; string statsName = prefix + "BreastCancer.stats.txt"; string outFile = DeleteOutputPath(Dir, outName); string statsFile = DeleteOutputPath(Dir, statsName); var argsStr = string.Format( "c=CreateInstances {0} {1} cifile={2} cistatsfile={3}", trainData, breast.extraSettings, outFile, statsFile); argsStr += " writer=TextInstanceWriter{stats+} disableTracking+"; var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr, args)); RunExperiments.Run(args); CheckEquality(Dir, outName); CheckEquality(Dir, statsName); Done(); }
/// <summary> /// generic method for parsing arguments using CommandLine. If there's a problem, it throws an InvalidOperationException, with a message giving usage. /// </summary> /// <param name="env">The host environment</param> /// <param name="args">The argument object</param> /// <param name="settings">The settings string (for example, "threshold-")</param> /// <param name="name">The name is used for error reporting only</param> public static void ParseArguments(IHostEnvironment env, object args, string settings, string name = null) { if (string.IsNullOrWhiteSpace(settings)) { return; } string errorMsg = null; // non-null errorMsg will indicate an error try { string err = null; string helpText; if (!CmdParser.ParseArguments(env, settings, args, e => { err = err ?? e; }, out helpText)) { errorMsg = err + (!string.IsNullOrWhiteSpace(name) ? "\nUSAGE FOR '" + name + "':\n" : "\nUSAGE:\n") + helpText; } } catch (Exception e) { Contracts.Assert(false); errorMsg = "Unexpected exception thrown while parsing: " + e.Message; } if (errorMsg != null) { throw Contracts.Except(errorMsg); } }
private Instances CreateExtractInstances(string filename, string settings = null) { var args = new ExtractInstances.Arguments(); CmdParser.ParseArguments(settings, args); return(new ExtractInstances(args, filename)); }
protected virtual int HandleListCommand(string[] args, DefaultOptions options) { var op = options.List; ConfigureServices(op); if (!CmdParser.ParseArguments(args, op)) { return(1); } var command = new NuGetListCommand { Url = op.Url, UserName = op.UserName, UserPassword = op.UserPassword }; if (string.IsNullOrEmpty(op.SharePointVersion)) { command.SharePointVersion = SharePointRuntimVersions.O365; } command.Execute(); return(0); }
/// <summary> /// Parses arguments using CmdParser. If there's a problem, it throws an InvalidOperationException, /// with a message giving usage. /// </summary> /// <param name="env">The host environment</param> /// <param name="args">The argument object</param> /// <param name="settings">The settings string</param> /// <param name="name">The name is used for error reporting only</param> private static void ParseArguments(IHostEnvironment env, object args, string settings, string name) { Contracts.AssertValue(args); Contracts.AssertNonEmpty(name); if (string.IsNullOrWhiteSpace(settings)) { return; } string errorMsg = null; try { string err = null; string helpText; if (!CmdParser.ParseArguments(env, settings, args, e => { err = err ?? e; }, out helpText)) { errorMsg = err + Environment.NewLine + "Usage For '" + name + "':" + Environment.NewLine + helpText; } } catch (Exception e) { Contracts.Assert(false); throw Contracts.Except(e, "Unexpected exception thrown while parsing: " + e.Message); } if (errorMsg != null) { throw Contracts.Except(errorMsg); } }
public void ParseCreateTableArgumentsTestShouldFail(string[] args) { var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); bool result = ctrl.CheckCommandLineArguments(); Assert.That(result, Is.False); }
public void CreateTableNonExistingBucketTest() { string[] args = new string[] { "create-table", "hopefullynonexistingbucket", testTableName, testFilename, "-t", testToken }; var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); var result = ctrl.Execute(); Assert.That(result, Is.False); }
public void ArgumentParseTest() { var env = new ConsoleEnvironment(seed: 42); var innerArg1 = new SimpleArg() { required = -2, text1 = "}", text2 = "{", text3 = " ", text4 = "\n", }; var innerArg2 = new SimpleArg() { required = -2, text1 = "{!@# $%^}&*{()}", text2 = "=", text3 = "\t", text4 = @"\\", }; var innerArg3 = new SimpleArg() { required = -2, text1 = "\r\n", text2 = "\"", text3 = "\" \" ", text4 = "{/a=2 /b=3 /c=4}", sub1 = CreateComponentFactory("S1", innerArg1.ToString(env)), sub2 = CreateComponentFactory("S2", innerArg2.ToString(env)), }; var outerArg1 = new SimpleArg() { required = -2, once = 2, text2 = "Testing", text3 = "a=7", sub1 = CreateComponentFactory("S1", innerArg1.ToString(env)), sub2 = CreateComponentFactory("S2", innerArg2.ToString(env)), sub3 = CreateComponentFactory("S3", innerArg3.ToString(env)), }; var testArg = new SimpleArg(); CmdParser.ParseArguments(env, outerArg1.ToString(env), testArg); Assert.Equal(outerArg1, testArg); CmdParser.ParseArguments(env, ((ICommandLineComponentFactory)outerArg1.sub1).GetSettingsString(), testArg = new SimpleArg()); Assert.Equal(innerArg1, testArg); CmdParser.ParseArguments(env, ((ICommandLineComponentFactory)outerArg1.sub2).GetSettingsString(), testArg = new SimpleArg()); Assert.Equal(innerArg2, testArg); CmdParser.ParseArguments(env, ((ICommandLineComponentFactory)outerArg1.sub3).GetSettingsString(), testArg = new SimpleArg()); Assert.Equal(innerArg3, testArg); }
public void ParseCreateTableArgumentsTest(string[] args) { var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); bool result = ctrl.CheckCommandLineArguments(); Assert.That(result, Is.True); Assert.That(command, Is.InstanceOf(typeof(CreateTableCommand))); }
public void BadFileTest() { string[] args = new string[] { "create-table", "main", "testtable", "sdfsdfsdf", "-t", testToken }; var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); var result = ctrl.Execute(); Assert.That(result, Is.False); }
public void WriteTableParseArgsOK(Tuple <string[], WriteTableCommand> args) { WriteTableCommand command = (WriteTableCommand)CmdParser.ParseArguments(args.Item1); WriteTableCommand shouldbe = (WriteTableCommand)args.Item2; // Assert.That(command, Is.InstanceOf(typeof(WriteTableCommand))); // Assert.AreEqual(command, shouldbe); command.ShouldHave().AllProperties().EqualTo(shouldbe); }
public void CreateTableOKTest() { string[] args = new string[] { "create-table", testBucketId, testTableName, testFilename, "-t", testToken }; var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); var result = ctrl.Execute(); Assert.That(result, Is.True); }
public void CreateOnExistingTableTestShouldFail() { string[] args = new string[] { "create-table", testBucketId, "existingTableTest", testFilename, "-t", testToken }; var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); var firstResult = ctrl.Execute(); var secondResult = ctrl.Execute(); Assert.That(secondResult, Is.False); }
public static IDataView GetKcHouseDataView(string dataPath) { var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,"; var txtArgs = new TextLoader.Arguments(); bool parsed = CmdParser.ParseArguments(s_environment, dataSchema, txtArgs); s_environment.Assert(parsed); var txtLoader = new TextLoader(s_environment, txtArgs, new MultiFileSource(dataPath)); return(txtLoader); }
public void ParseVerboseArgTest(string [] args) { //string[] args = new string[] { "-q", "-v" }; ///string[] args = new string[] { "create-table", "-f", "asdasd", "-t", "adasd", "-nSomeName", "-b", "sdf", "-v" }; //string [] args = new string[]{ }; var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); Assert.That(ctrl._currentLogLevel, Is.EqualTo(LogLevel.debug)); //Assert.That(CmdParser.GlobalOpations.Verbose, Is.True); }
protected virtual int HandleInstallCommand(string[] args, DefaultOptions options) { var op = options.Install; ConfigureServices(op); if (!CmdParser.ParseArguments(args, op)) { return(1); } var sources = new List <string>(); if (!string.IsNullOrEmpty(op.Source)) { sources.Add(op.Source); } sources.AddRange(DefaultValues.DefaultNuGetRepositories); var command = new DefaultInstallCommand { PackageSources = sources, Url = op.Url, Id = op.Id, Version = op.Version, PreRelease = op.PreRelease, UserName = op.UserName, UserPassword = op.UserPassword, SharePointVersion = op.SharePointVersion, SharePointEdition = op.SharePointEdition, SharePointApi = op.SharePointApi, ToolId = op.ToolId, ToolVersion = op.ToolVersion, Force = op.Force }; if (string.IsNullOrEmpty(op.SharePointVersion)) { command.SharePointVersion = SharePointRuntimVersions.O365; } command.Execute(); return(0); }
public void TestCreateTextInstances() { TestDataset adult = TestDatasets.adult; string trainData = GetDataPath(adult.trainFilename); string testData = GetDataPath(adult.testFilename); var prefix = TestContext.TestName + "-"; string outName = prefix + "Adult-Train.txt"; string statsName = prefix + "Adult-Train.stats.txt"; string outTestName = prefix + "Adult-Test.txt"; string testStatsName = prefix + "Adult-Test.stats.txt"; string outValidName = prefix + "Adult-Valid.txt"; string validStatsName = prefix + "Adult-Valid.stats.txt"; string outFile = DeleteOutputPath(Dir, outName); string statsFile = DeleteOutputPath(Dir, statsName); string outTestFile = DeleteOutputPath(Dir, outTestName); string testStatsFile = DeleteOutputPath(Dir, testStatsName); string outValidFile = DeleteOutputPath(Dir, outValidName); string validStatsFile = DeleteOutputPath(Dir, validStatsName); var argsStr = string.Format( "/c=CreateInstances {0} /test={1} /valid={1} /cacheinst=- {2} " + "/cifile={3} /cistatsfile={4} /citestfile={5} /citeststatsfile={6} /civalidfile={7} /civalidstatsfile={8}", trainData, testData, adult.extraSettings, outFile, statsFile, outTestFile, testStatsFile, outValidFile, validStatsFile); argsStr += " /writer TextInstanceWriter{/stats=+} /disableTracking=+"; var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr, args)); RunExperiments.Run(args); CheckEquality(Dir, outName); CheckEquality(Dir, statsName); CheckEquality(Dir, outTestName); CheckEquality(Dir, testStatsName); CheckEquality(Dir, outValidName); CheckEquality(Dir, validStatsName); Done(); }
public void TestPcaTransform() { // Force Microsoft.ML.Runtime.PCA assembly to be loaded into the AppDomain so // ReflectionUtils.FindClassCore does not return null when called by ReflectionUtils.CreateInstance Assert.AreEqual(typeof(PCAPredictor).Name, "PCAPredictor"); string trainData = GetDataPath(TestDatasets.mnistTiny28.trainFilename); string fileName = TestContext.TestName + "-Train.txt"; string outFile = DeleteOutputPath(Dir, fileName); const int rank = 3; string pcaTransformArgs = string.Format("/inst Trans{{trans=pca {{k={0} seed=1}}}}", rank); var argsStr1 = string.Format( "/c CreateInstances {0} {1} /rs=1 /cifile={2}", trainData, pcaTransformArgs, outFile); var args1 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1)); RunExperiments.Run(args1); CheckEquality(Dir, fileName); // Verify the scales of the transformed features decrease with respect to the feature index TlcTextInstances outputInstances = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile); Double[] l1norms = new Double[rank]; foreach (Instance instance in outputInstances) { Assert.IsTrue(instance.Features.Count == rank); for (int i = 0; i < instance.Features.Values.Length; i++) { l1norms[i] += (instance.Features.Values[i] < 0 ? -instance.Features.Values[i] : instance.Features.Values[i]); } } for (int i = 0; i < l1norms.Length - 1; i++) { Assert.IsTrue(l1norms[i] > l1norms[i + 1]); } Done(); }
public void TestCreateTextInstancesWithNormalization() { TestDataset dataset = TestDatasets.mnistTiny28; string trainData = GetDataPath(dataset.trainFilename); string testData = GetDataPath(dataset.testFilename); var prefix = TestContext.TestName + "-"; string outFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Train.txt"); string outTestFile1 = DeleteOutputPath(Dir, prefix + "Norm-Separate-Test.txt"); string outFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Train.txt"); string outTestFile2 = DeleteOutputPath(Dir, prefix + "Norm-Trans-Test.txt"); string transArgs = "inst=Trans{trans=RFF {rng=1}}"; var argsStr1 = string.Format( "/c=CreateInstances {0} /test={1} /norm=MinMaxNormalizer /{2} /cifile={3} /citestfile={4}", trainData, testData, transArgs, outFile1, outTestFile1); var args1 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr1, args1)); RunExperiments.Run(args1); var argsStr2 = string.Format( "/c=CreateInstances {0} /test={1} /inst Trans{{trans=MinMaxNormalizer {2}}} /cifile={3} /citestfile={4}", trainData, testData, transArgs, outFile2, outTestFile2); var args2 = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsStr2, args2)); RunExperiments.Run(args2); var instances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile1); var instances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outFile2); CompareInstances(instances1, instances2); var testInstances1 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile1); var testInstances2 = new TlcTextInstances(new TlcTextInstances.Arguments(), outTestFile2); CompareInstances(testInstances1, testInstances2); Done(); }
// [TestCase("blablabla")] public void UpdateNonExistingTableTest(string nonExistingTableId) { string updateFileName = "filetoupdate" + DateTime.Now.Ticks + ".csv"; string[] args = new string[] { "write-table", nonExistingTableId, updateFileName, "-t", testToken }; //creates test file FileInfo updateFileInfo = new FileInfo(updateFileName); string[] dataToUpdate = PrepareDataToUpdate(100); System.IO.File.WriteAllLines(updateFileInfo.FullName, dataToUpdate); var command = CmdParser.ParseArguments(args); Controller ctrl = new Controller(command); bool result = ctrl.Execute(); Assert.That(result, Is.False); updateFileInfo.Delete(); }
/// <summary> /// Process a script to be parsed (from the input resource). /// </summary> private static void Process(IndentingTextWriter wrt, string text, ArgsBase defaults) { var env = new TlcEnvironment(seed: 42); using (wrt.Nest()) { var args1 = defaults.Clone(); using (wrt.Nest()) { if (!CmdParser.ParseArguments(env, text, args1, s => wrt.WriteLine("*** {0}", s))) { wrt.WriteLine("*** Failed!"); } } string str1 = args1.ToString(); wrt.WriteLine("ToString: {0}", str1); string settings1 = CmdParser.GetSettings(env, args1, defaults, SettingsFlags.None); wrt.WriteLine("Settings: {0}", settings1); var args2 = defaults.Clone(); using (wrt.Nest()) { if (!CmdParser.ParseArguments(env, settings1, args2, s => wrt.WriteLine("*** BUG: {0}", s))) { wrt.WriteLine("*** BUG: parsing result of GetSettings failed!"); } } string str2 = args2.ToString(); if (str1 != str2) { wrt.WriteLine("*** BUG: ToString Mismatch: {0}", str2); } string settings2 = CmdParser.GetSettings(env, args2, defaults, SettingsFlags.None); if (settings1 != settings2) { wrt.WriteLine("*** BUG: Settings Mismatch: {0}", settings2); } } }
public void TestCrossValidationWithInvalidTester() { var argsStr = GetDataPath(TestDatasets.breastCancer.trainFilename) + " /ev=MulticlassTester /o z.txt /threads=+ /disableTracking=+"; var args = new TLCArguments(); CmdParser.ParseArguments(argsStr, args); try { RunExperiments.Run(args); } catch (AggregateException ex) { Log("Caught expected exception: {0}", ex); Done(); return; } Fail("Expected exception!"); Done(); }
protected virtual int HandleUpdateCommand(string[] args, DefaultOptions options) { var op = options.Update; ConfigureServices(op); if (!CmdParser.ParseArguments(args, op)) { return(1); } var command = new DefaultUpdateCommand { Source = op.Source, Url = op.Url, Id = op.Id, Version = op.Version, PreRelease = op.PreRelease, UserName = op.UserName, UserPassword = op.UserPassword, SharePointVersion = op.SharePointVersion, SharePointEdition = op.SharePointEdition, SharePointApi = op.SharePointApi, Force = op.Force }; if (string.IsNullOrEmpty(op.SharePointVersion)) { command.SharePointVersion = SharePointRuntimVersions.O365; } command.Execute(); return(0); }
protected virtual int HandlePushCommand(string[] args, DefaultOptions options) { var op = options.Push; ConfigureServices(op); if (!CmdParser.ParseArguments(args, op)) { return(1); } Info(string.Format("Resolving package path [{0}]", op.Package)); var packageFileFullPath = Path.GetFullPath(op.Package); Info(string.Format("Resolved package path [{0}] into [{1}]", op.Package, packageFileFullPath)); if (!File.Exists(packageFileFullPath)) { Info(string.Format("File does not exist:[{0}]", packageFileFullPath)); } using (var stream = File.Open(packageFileFullPath, FileMode.Open)) { var command = new DefaultNuGetPushCommand { Source = op.Source, ApiKey = op.ApiKey, Package = stream }; command.Execute(); } return(0); }
public void TestFeatureHandlerIncorrectMapping() { string trainData = GetDataPath(TestDatasets.breastCancer.trainFilename); string dataModelFile = DeleteOutputPath(Dir, TestContext.TestName + "-data-model.zip"); string ciFile = DeleteOutputPath(Dir, TestContext.TestName + "-ci.tsv"); string argsString = string.Format( "/c CreateInstances {0} /inst Text{{text=1,2,3}} /m {1} /cifile {2}", trainData, dataModelFile, ciFile); var args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsString, args)); RunExperiments.Run(args); string ciFailFile = DeleteOutputPath(Dir, TestContext.TestName + "-ci-fail.tsv"); argsString = string.Format( "/c CreateInstances {0} /inst Text{{text=1,2}} /im {1} /cifile {2}", trainData, dataModelFile, ciFailFile); args = new TLCArguments(); Assert.IsTrue(CmdParser.ParseArguments(argsString, args)); try { RunExperiments.Run(args); Assert.Fail("Expected to throw with different input model format"); } catch (Exception ex) { Assert.IsTrue(ex.GetBaseException() is InvalidOperationException); } Done(); }
public int Run(string[] args) { Args = args; if (!args.Any()) { args = new string[1] { "help" } } ; var result = 0; var options = new DefaultOptions(); // get the log file location var preDefaultArgs = new EmptySubOptionsBase(); CmdParser.ParseArguments(args, preDefaultArgs); if (!string.IsNullOrEmpty(preDefaultArgs.LogFile)) { LogFile = preDefaultArgs.LogFile; } if (!string.IsNullOrEmpty(preDefaultArgs.Output) && preDefaultArgs.Output.ToLower() == "json") { IsJsonOutput = true; } if (!CmdParser.ParseArguments(args, options, (verb, subOption) => { if (!hadFirstRun) { ConfigureServiceContainer(); AppDomain.CurrentDomain.AssemblyResolve += CurrentDomain_AssemblyResolve; hadFirstRun = true; } var currentFilePath = GetType().Assembly.Location; var currentFolderPath = new DirectoryInfo(currentFilePath).Parent.FullName; Info(string.Format("Metapack client v{0}", GetCurrentClientVersion())); Info(string.Format("Working directory: [{0}]", currentFolderPath)); WorkingDirectory = currentFolderPath; ParseAppConfig(); if (options.List != null) { result = HandleListCommand(args, options); } else if (options.Install != null) { result = HandleInstallCommand(args, options); } else if (options.Update != null) { result = HandleUpdateCommand(args, options); } else if (options.Push != null) { result = HandlePushCommand(args, options); } else if (options.Version != null) { result = HandleVersionCommand(args, options); } else if (IsHelp) { Info(options.GetUsage()); result = 0; } else { result = HandleMissedCommand(options); } })) { if (!IsHelp) { HandleWrongArgumentParsing(); } } return(result); }
public void FactoryExampleTest() { var dataset = TestDatasets.adultText; string dataFilename = GetDataPath(dataset.trainFilename); string testDataFilename = GetDataPath(dataset.testFilename); ///********* Training a model *******// string modelFilename = Path.GetTempFileName(); TLCArguments cmd = new TLCArguments(); Assert.True(CmdParser.ParseArguments(dataset.extraSettings, cmd)); cmd.command = Command.Train; cmd.modelfile = modelFilename; cmd.datafile = dataFilename; cmd.instancesSettings = dataset.settings; cmd.classifierName = TestLearners.linearSVM.Trainer; RunExperiments.Run(cmd); // Load and make predictions with a previously saved model. IDataModel dataModel; IDataStats dataStats; var predictor = (IDistributionPredictor <Instance, Float, Float>)PredictorUtils.LoadPredictor( out dataModel, out dataStats, modelFilename); var instanceFactory = ReflectionUtilsOld.CreateInstanceOld <IInstanceFactory, SignatureInstances>( cmd.instancesClass, cmd.instancesSettings, null, dataModel); bool headerSkip = true; List <Float> outputs = new List <Float>(); List <Float> probabilities = new List <Float>(); using (StreamReader reader = new StreamReader(testDataFilename)) { List <string> features = new List <string>(); string text; long line = 0; while ((text = reader.ReadLine()) != null) { ++line; if (string.IsNullOrWhiteSpace(text)) { continue; } string[] cols = text.Split(','); Assert.True(cols.Length == 15); if (headerSkip) { // skip header line headerSkip = false; continue; } features.Clear(); // Add in the "max dimensionality" features.Add("15"); for (int col = 0; col < cols.Length; col++) { string s = cols[col].Trim(); switch (col) { case 0: case 2: case 4: case 10: case 11: case 12: case 14: // numeric feature or label -- add if non-zero Float val = InstancesUtils.FloatParse(s); if (val == 0) // Beware of NaNs - they should be recorded! { continue; } break; } features.Add(col + ":" + s); } Instance instance = instanceFactory.ProduceInstance(line, features.ToArray()); Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); outputs.Add(rawOutput); probabilities.Add(probability); } } List <Float> originalOutputs = new List <Float>(); List <Float> originalProbabilities = new List <Float>(); var env = new LocalEnvironment(SysRandom.Wrap(RunExperiments.GetRandom(cmd))); Instances instances = RunExperiments.CreateTestData(cmd, testDataFilename, dataModel, null, env); foreach (Instance instance in instances) { Float rawOutput, probability; probability = predictor.PredictDistribution(instance, out rawOutput); originalOutputs.Add(rawOutput); originalProbabilities.Add(probability); } CollectionAssert.Equal(outputs, originalOutputs); CollectionAssert.Equal(probabilities, originalProbabilities); File.Delete(modelFilename); Done(); }
// This method is called if only a datafile is specified, without a loader/term and value columns. // It determines the type of the Value column and returns the appropriate TextLoader component factory. private static IComponentFactory <IMultiStreamSource, IDataLoader> GetLoaderFactory(string filename, bool keyValues, IHost host) { Contracts.AssertValue(host); // If the user specified non-key values, we define the value column to be numeric. if (!keyValues) { return(ComponentFactoryUtils.CreateFromFunction <IMultiStreamSource, IDataLoader>( (env, files) => TextLoader.Create( env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("Term", DataKind.TX, 0), new TextLoader.Column("Value", DataKind.Num, 1) } }, files))); } // If the user specified key values, we scan the values to determine the range of the key type. ulong min = ulong.MaxValue; ulong max = ulong.MinValue; try { var txtArgs = new TextLoader.Arguments(); bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs); host.Assert(parsed); var data = TextLoader.ReadFile(host, txtArgs, new MultiFileSource(filename)); using (var cursor = data.GetRowCursor(c => true)) { var getTerm = cursor.GetGetter <DvText>(0); var getVal = cursor.GetGetter <DvText>(1); DvText txt = default(DvText); using (var ch = host.Start("Creating Text Lookup Loader")) { long countNonKeys = 0; while (cursor.MoveNext()) { getVal(ref txt); ulong res; // Try to parse the text as a key value between 1 and ulong.MaxValue. If this succeeds and res>0, // we update max and min accordingly. If res==0 it means the value is missing, in which case we ignore it for // computing max and min. if (Conversions.Instance.TryParseKey(ref txt, 1, ulong.MaxValue, out res)) { if (res < min && res != 0) { min = res; } if (res > max) { max = res; } } // If parsing as key did not succeed, the value can still be 0, so we try parsing it as a ulong. If it succeeds, // then the value is 0, and we update min accordingly. else if (Conversions.Instance.TryParse(ref txt, out res)) { ch.Assert(res == 0); min = 0; } //If parsing as a ulong fails, we increment the counter for the non-key values. else { var term = default(DvText); getTerm(ref term); if (countNonKeys < 5) { ch.Warning("Term '{0}' in mapping file is mapped to non key value '{1}'", term, txt); } countNonKeys++; } } if (countNonKeys > 0) { ch.Warning("Found {0} non key values in the file '{1}'", countNonKeys, filename); } if (min > max) { min = 0; max = uint.MaxValue - 1; ch.Warning("did not find any valid key values in the file '{0}'", filename); } else { ch.Info("Found key values in the range {0} to {1} in the file '{2}'", min, max, filename); } ch.Done(); } } } catch (Exception e) { throw host.Except(e, "Failed to parse the lookup file '{0}' in TermLookupTransform", filename); } TextLoader.Column valueColumn = new TextLoader.Column("Value", DataKind.U4, 1); if (max - min < (ulong)int.MaxValue) { valueColumn.KeyRange = new KeyRange(min, max); } else if (max - min < (ulong)uint.MaxValue) { valueColumn.KeyRange = new KeyRange(min); } else { valueColumn.Type = DataKind.U8; valueColumn.KeyRange = new KeyRange(min); } return(ComponentFactoryUtils.CreateFromFunction <IMultiStreamSource, IDataLoader>( (env, files) => TextLoader.Create( env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("Term", DataKind.TX, 0), valueColumn } }, files))); }
private static void RunGraphCore(EnvironmentBlock *penv, IHostEnvironment env, string graphStr, int cdata, DataSourceBlock **ppdata) { Contracts.AssertValue(env); var args = new RunGraphArgs(); string err = null; if (!CmdParser.ParseArguments(env, graphStr, args, e => err = err ?? e)) { throw env.Except(err); } int?maxThreadsAllowed = Math.Min(args.parallel > 0 ? args.parallel.Value : penv->maxThreadsAllowed, penv->maxThreadsAllowed); maxThreadsAllowed = penv->maxThreadsAllowed > 0 ? maxThreadsAllowed : args.parallel; var host = env.Register("RunGraph", args.randomSeed, null); JObject graph; try { graph = JObject.Parse(args.graph); } catch (JsonReaderException ex) { throw host.Except(ex, "Failed to parse experiment graph: {0}", ex.Message); } var runner = new GraphRunner(host, graph["nodes"] as JArray); var dvNative = new IDataView[cdata]; try { for (int i = 0; i < cdata; i++) { dvNative[i] = new NativeDataView(host, ppdata[i]); } // Setting inputs. var jInputs = graph["inputs"] as JObject; if (graph["inputs"] != null && jInputs == null) { throw host.Except("Unexpected value for 'inputs': {0}", graph["inputs"]); } int iDv = 0; if (jInputs != null) { foreach (var kvp in jInputs) { var pathValue = kvp.Value as JValue; if (pathValue == null) { throw host.Except("Invalid value for input: {0}", kvp.Value); } var path = pathValue.Value <string>(); var varName = kvp.Key; var type = runner.GetPortDataKind(varName); switch (type) { case TlcModule.DataKind.FileHandle: var fh = new SimpleFileHandle(host, path, false, false); runner.SetInput(varName, fh); break; case TlcModule.DataKind.DataView: IDataView dv; if (!string.IsNullOrWhiteSpace(path)) { var extension = Path.GetExtension(path); if (extension == ".txt") { dv = TextLoader.LoadFile(host, new TextLoader.Options(), new MultiFileSource(path)); } else if (extension == ".dprep") { dv = DataFlow.FromDPrepFile(path).ToDataView(); } else { dv = new BinaryLoader(host, new BinaryLoader.Arguments(), path); } } else { Contracts.Assert(iDv < dvNative.Length); // prefetch all columns dv = dvNative[iDv++]; var prefetch = new int[dv.Schema.Count]; for (int i = 0; i < prefetch.Length; i++) { prefetch[i] = i; } dv = new CacheDataView(host, dv, prefetch); } runner.SetInput(varName, dv); break; case TlcModule.DataKind.PredictorModel: PredictorModel pm; if (!string.IsNullOrWhiteSpace(path)) { using (var fs = File.OpenRead(path)) pm = new PredictorModelImpl(host, fs); } else { throw host.Except("Model must be loaded from a file"); } runner.SetInput(varName, pm); break; case TlcModule.DataKind.TransformModel: TransformModel tm; if (!string.IsNullOrWhiteSpace(path)) { using (var fs = File.OpenRead(path)) tm = new TransformModelImpl(host, fs); } else { throw host.Except("Model must be loaded from a file"); } runner.SetInput(varName, tm); break; default: throw host.Except("Port type {0} not supported", type); } } } runner.RunAll(); // Reading outputs. using (var ch = host.Start("Reading outputs")) { var jOutputs = graph["outputs"] as JObject; if (jOutputs != null) { foreach (var kvp in jOutputs) { var pathValue = kvp.Value as JValue; if (pathValue == null) { throw host.Except("Invalid value for input: {0}", kvp.Value); } var path = pathValue.Value <string>(); var varName = kvp.Key; var type = runner.GetPortDataKind(varName); switch (type) { case TlcModule.DataKind.FileHandle: var fh = runner.GetOutput <IFileHandle>(varName); throw host.ExceptNotSupp("File handle outputs not yet supported."); case TlcModule.DataKind.DataView: var idv = runner.GetOutput <IDataView>(varName); if (!string.IsNullOrWhiteSpace(path)) { SaveIdvToFile(idv, path, host); } else { var infos = ProcessColumns(ref idv, args.maxSlots, host); SendViewToNative(ch, penv, idv, infos); } break; case TlcModule.DataKind.PredictorModel: var pm = runner.GetOutput <PredictorModel>(varName); if (!string.IsNullOrWhiteSpace(path)) { SavePredictorModelToFile(pm, path, host); } else { throw host.Except("Returning in-memory models is not supported"); } break; case TlcModule.DataKind.TransformModel: var tm = runner.GetOutput <TransformModel>(varName); if (!string.IsNullOrWhiteSpace(path)) { using (var fs = File.OpenWrite(path)) tm.Save(host, fs); } else { throw host.Except("Returning in-memory models is not supported"); } break; case TlcModule.DataKind.Array: var objArray = runner.GetOutput <object[]>(varName); if (objArray is PredictorModel[]) { var modelArray = (PredictorModel[])objArray; // Save each model separately for (var i = 0; i < modelArray.Length; i++) { var modelPath = string.Format(CultureInfo.InvariantCulture, path, i); SavePredictorModelToFile(modelArray[i], modelPath, host); } } else { throw host.Except("DataKind.Array type {0} not supported", objArray.First().GetType()); } break; default: throw host.Except("Port type {0} not supported", type); } } } } } finally { // The raw data view is disposable so it lets go of unmanaged raw pointers before we return. for (int i = 0; i < dvNative.Length; i++) { var view = dvNative[i]; if (view == null) { continue; } host.Assert(view is IDisposable); var disp = (IDisposable)dvNative[i]; disp.Dispose(); } } }
// This method is called if only a datafile is specified, without a loader/term and value columns. // It determines the type of the Value column and returns the appropriate TextLoader component factory. private static IComponentFactory <IMultiStreamSource, IDataLoader> GetLoaderFactory(string filename, bool keyValues, IHost host) { Contracts.AssertValue(host); // If the user specified non-key values, we define the value column to be numeric. if (!keyValues) { return(ComponentFactoryUtils.CreateFromFunction <IMultiStreamSource, IDataLoader>( (env, files) => TextLoader.Create( env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("Term", DataKind.TX, 0), new TextLoader.Column("Value", DataKind.Num, 1) } }, files))); } // If the user specified key values, we scan the values to determine the range of the key type. ulong min = ulong.MaxValue; ulong max = ulong.MinValue; try { var txtArgs = new TextLoader.Arguments(); bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs); host.Assert(parsed); var data = TextLoader.ReadFile(host, txtArgs, new MultiFileSource(filename)); using (var cursor = data.GetRowCursor(c => true)) { var getTerm = cursor.GetGetter <ReadOnlyMemory <char> >(0); var getVal = cursor.GetGetter <ReadOnlyMemory <char> >(1); ReadOnlyMemory <char> txt = default; using (var ch = host.Start("Creating Text Lookup Loader")) { long countNonKeys = 0; while (cursor.MoveNext()) { getVal(ref txt); ulong res; // Try to parse the text as a key value between 1 and ulong.MaxValue. If this succeeds and res>0, // we update max and min accordingly. If res==0 it means the value is missing, in which case we ignore it for // computing max and min. if (Runtime.Data.Conversion.Conversions.Instance.TryParseKey(in txt, 1, ulong.MaxValue, out res)) { if (res < min && res != 0) { min = res; } if (res > max) { max = res; } } // If parsing as key did not succeed, the value can still be 0, so we try parsing it as a ulong. If it succeeds, // then the value is 0, and we update min accordingly. else if (Runtime.Data.Conversion.Conversions.Instance.TryParse(in txt, out res)) { ch.Assert(res == 0); min = 0; }