/// <summary> /// Run TrainTest, CV, and TrainSaveTest for a single predictor on a single dataset. /// </summary> protected void RunOneAllTests(PredictorAndArgs predictor, TestDataset dataset, string[] extraSettings = null, string extraTag = "", bool summary = false, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { Contracts.Assert(IsActive); Run_TrainTest(predictor, dataset, extraSettings, extraTag, summary: summary, digitsOfPrecision: digitsOfPrecision, parseOption: parseOption); Run_CV(predictor, dataset, extraSettings, extraTag, useTest: true, digitsOfPrecision: digitsOfPrecision, parseOption: parseOption); }
/// <summary> /// Run a train-test unit test /// </summary> protected void Run_TrainTest(PredictorAndArgs predictor, TestDataset dataset, string[] extraSettings = null, string extraTag = "", bool expectFailure = false, bool summary = false, bool saveAsIni = false, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { RunContext ctx = new RunContext(this, Cmd.TrainTest, predictor, dataset, extraSettings, extraTag, expectFailure: expectFailure, summary: summary, saveAsIni: saveAsIni); Run(ctx, digitsOfPrecision, parseOption); }
/// <summary> /// Run TrainTest and CV for a set of predictors on a set of datasets. /// </summary> protected void RunAllTests( IList <PredictorAndArgs> predictors, IList <TestDataset> datasets, string[] extraSettings = null, string extraTag = "", bool summary = false, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { Contracts.Assert(IsActive); foreach (TestDataset dataset in datasets) { foreach (PredictorAndArgs predictor in predictors) { RunOneAllTests(predictor, dataset, extraSettings, extraTag, summary, digitsOfPrecision, parseOption); } } }
/// <summary> /// Run a cross-validation unit test, over the training set, unless /// <paramref name="useTest"/> is set. /// </summary> protected void Run_CV(PredictorAndArgs predictor, TestDataset dataset, string[] extraSettings = null, string extraTag = "", bool useTest = false, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { if (useTest) { // REVIEW: It is very strange to use the *test* set in // cross validation. Should this just be deprecated outright? dataset = dataset.Clone(); dataset.trainFilename = dataset.testFilename; } RunContext cvCtx = new RunContext(this, Cmd.CV, predictor, dataset, extraSettings, extraTag); Run(cvCtx, digitsOfPrecision, parseOption); }
protected bool CheckEqualityCore(string dir, string name, string nameBase, bool normalize, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { Contracts.Assert(IsActive); Contracts.AssertValue(dir); // Can be empty. Contracts.AssertNonEmpty(name); Contracts.AssertNonEmpty(nameBase); // The following assert is necessary since some tests were attempting to // combine the ZBasline directory with an absolute path, leading to an output // file being compared with itself. Contracts.Assert(!Path.IsPathRooted(name), "file name should not be a full path"); Contracts.Assert(!Path.IsPathRooted(nameBase), "file nameBase should not be a full path"); string relPath = Path.Combine(dir, name); string basePath = GetBaselinePath(dir, nameBase); string outPath = GetOutputPath(dir, name); if (!CheckOutFile(outPath)) { return(false); } // Normalize the output file. if (normalize) { Normalize(outPath); } if (!CheckBaseFile(basePath)) { return(false); } bool res = CheckEqualityFromPathsCore(relPath, basePath, outPath, digitsOfPrecision: digitsOfPrecision, parseOption: parseOption); // No need to keep the raw (unnormalized) output file. if (normalize && res) { File.Delete(outPath + RawSuffix); } return(res); }
private bool GetNumbersFromFile(ref string firstString, ref string secondString, int digitsOfPrecision, NumberParseOption parseOption) { MatchCollection firstCollection = _matchNumbers.Matches(firstString); MatchCollection secondCollection = _matchNumbers.Matches(secondString); if (firstCollection.Count == secondCollection.Count) { if (!MatchNumberWithTolerance(firstCollection, secondCollection, digitsOfPrecision, parseOption)) { return(false); } } firstString = _matchNumbers.Replace(firstString, "%Number%"); secondString = _matchNumbers.Replace(secondString, "%Number%"); return(true); }
private bool MatchNumberWithTolerance(MatchCollection firstCollection, MatchCollection secondCollection, int digitsOfPrecision, NumberParseOption parseOption) { if (parseOption == NumberParseOption.UseSingle) { for (int i = 0; i < firstCollection.Count; i++) { float f1 = float.Parse(firstCollection[i].ToString()); float f2 = float.Parse(secondCollection[i].ToString()); if (!CompareNumbersWithTolerance(f1, f2, i, digitsOfPrecision)) { return(false); } } } else if (parseOption == NumberParseOption.UseDouble) { for (int i = 0; i < firstCollection.Count; i++) { double f1 = double.Parse(firstCollection[i].ToString()); double f2 = double.Parse(secondCollection[i].ToString()); if (!CompareNumbersWithTolerance(f1, f2, i, digitsOfPrecision)) { return(false); } } } else { throw new ArgumentException($"Invalid {nameof(NumberParseOption)}", nameof(parseOption)); } return(true); }
protected bool CheckEqualityFromPathsCore(string relPath, string basePath, string outPath, int skip = 0, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { Contracts.Assert(skip >= 0); using (StreamReader baseline = OpenReader(basePath)) using (StreamReader result = OpenReader(outPath)) { int count = 0; if (skip > 0) { string line2; do { line2 = result.ReadLine(); if (line2 == null) { Fail("Output is shorter than the skip value of {0}!", skip); return(false); } count++; } while (count <= skip); string line1; do { line1 = baseline.ReadLine(); if (line1 == null) { Fail("Couldn't match output file line to a line in the baseline!"); return(false); } } while (line1 != line2); } for (; ;) { // read lines while we can string line1 = baseline.ReadLine(); string line2 = result.ReadLine(); if (line1 == null && line2 == null) { Log("Output matches baseline: '{0}'", relPath); return(true); } count++; var inRange = GetNumbersFromFile(ref line1, ref line2, digitsOfPrecision, parseOption); var line1Core = line1.Replace(" ", "").Replace("\t", ""); var line2Core = line2.Replace(" ", "").Replace("\t", ""); if (!inRange || line1Core != line2Core) { if (line1 == null || line2 == null) { Fail("Output and baseline different lengths: '{0}'", relPath); } else { Fail("Output and baseline mismatch at line {1}, expected '{2}' but got '{3}' : '{0}'", relPath, count, line1, line2); } return(false); } } } }
/// <summary> /// Check whether two files are same ignoring volatile differences (path, dates, times, etc). /// Returns true if the check passes. /// </summary> protected bool CheckEqualityNormalized(string dir, string name, string nameBase = null, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { return(CheckEqualityCore(dir, name, nameBase ?? name, true, digitsOfPrecision, parseOption)); }
/// <summary> /// Run the predictor with given args and check if it adds up /// </summary> protected void Run(RunContext ctx, int digitsOfPrecision = DigitsOfPrecision, NumberParseOption parseOption = NumberParseOption.Default) { Contracts.Assert(IsActive); List <string> args = new List <string>(); if (ctx.Command != Cmd.Test) { AddIfNotEmpty(args, ctx.Predictor.Trainer, "tr"); } string dataName = ctx.Command == Cmd.Test ? ctx.Dataset.testFilename : ctx.Dataset.trainFilename; AddIfNotEmpty(args, GetDataPath(dataName), "data"); AddIfNotEmpty(args, 1, "seed"); //AddIfNotEmpty(args, false, "threads"); Log("Running '{0}' on '{1}'", ctx.Predictor.Trainer.Kind, ctx.Dataset.name); string dir = ctx.BaselineDir; if (ctx.Command == Cmd.TrainTest) { AddIfNotEmpty(args, GetDataPath(ctx.Dataset.testFilename), "test"); } if (ctx.Command == Cmd.TrainTest || ctx.Command == Cmd.Train) { AddIfNotEmpty(args, GetDataPath(ctx.Dataset.validFilename), "valid"); } // Add in the loader args, and keep a location so we can backtrack and remove it later. int loaderArgIndex = -1; string loaderArgs = GetLoaderTransformSettings(ctx.Dataset); if (!string.IsNullOrWhiteSpace(loaderArgs)) { loaderArgIndex = args.Count; args.Add(loaderArgs); } // Add in the dataset transforms. These need to come before the predictor imposed transforms. if (ctx.Dataset.mamlExtraSettings != null) { args.AddRange(ctx.Dataset.mamlExtraSettings); } // Model file output, used only for train/traintest. var modelPath = ctx.Command == Cmd.Train || ctx.Command == Cmd.TrainTest ? ctx.ModelPath() : null; AddIfNotEmpty(args, modelPath, "out"); string basePrefix = ctx.BaselineNamePrefix; // Predictions output, for all types of commands except train. OutputPath predOutPath = ctx.Command == Cmd.Train ? null : ctx.InitPath(".txt"); AddIfNotEmpty(args, predOutPath, "dout"); if (ctx.Predictor.MamlArgs != null) { args.AddRange(ctx.Predictor.MamlArgs); } // If CV, do not run the CV in multiple threads. if (ctx.Command == Cmd.CV) { args.Add("threads-"); } if (ctx.ExtraArgs != null) { foreach (string arg in ctx.ExtraArgs) { args.Add(arg); } } AddIfNotEmpty(args, ctx.Predictor.Scorer, "scorer"); if (ctx.Command != Cmd.Test) { AddIfNotEmpty(args, ctx.Predictor.Tester, "eval"); } else { AddIfNotEmpty(args, ctx.ModelOverride.Path, "in"); } string runcmd = string.Join(" ", args.Where(a => !string.IsNullOrWhiteSpace(a))); Log(" Running as: {0} {1}", ctx.Command, runcmd); int res; if (basePrefix == null) { // Not capturing into a specific log. Log("*** Start raw predictor output"); res = MainForTest(_env, LogWriter, string.Join(" ", ctx.Command, runcmd), ctx.BaselineProgress); Log("*** End raw predictor output, return={0}", res); return; } var consOutPath = ctx.StdoutPath(); TestCore(ctx, ctx.Command.ToString(), runcmd, digitsOfPrecision: digitsOfPrecision, parseOption: parseOption); bool matched = consOutPath.CheckEqualityNormalized(digitsOfPrecision, parseOption: parseOption); if (modelPath != null && (ctx.Summary || ctx.SaveAsIni)) { // Save the predictor summary and compare it to baseline. string str = string.Format("SavePredictorAs in={{{0}}}", modelPath.Path); List <string> files = new List <string>(); if (ctx.Summary) { var summaryName = basePrefix + "-summary.txt"; files.Add(summaryName); var summaryPath = DeleteOutputPath(dir, summaryName); str += string.Format(" sum={{{0}}}", summaryPath); Log(" Saving summary with: {0}", str); } if (ctx.SaveAsIni) { var iniName = basePrefix + ".ini"; files.Add(iniName); var iniPath = DeleteOutputPath(dir, iniName); str += string.Format(" ini={{{0}}}", iniPath); Log(" Saving ini file: {0}", str); } MainForTest(_env, LogWriter, str); files.ForEach(file => CheckEqualityNormalized(dir, file, digitsOfPrecision: digitsOfPrecision, parseOption: parseOption)); } if (ctx.Command == Cmd.Train || ctx.Command == Cmd.Test || ctx.ExpectedToFail) { return; } // ResultProcessor output if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) // -rp.txt files are not getting generated for Non-Windows Os { string rpName = basePrefix + "-rp.txt"; string rpOutPath = DeleteOutputPath(dir, rpName); string[] rpArgs = null; if (ctx.Command == Cmd.CV && ctx.ExtraArgs != null && ctx.ExtraArgs.Any(arg => arg.Contains("opf+"))) { rpArgs = new string[] { "opf+" } } ; // Run result processor on the console output. RunResultProcessorTest(new string[] { consOutPath.Path }, rpOutPath, rpArgs); CheckEqualityNormalized(dir, rpName, digitsOfPrecision: digitsOfPrecision, parseOption: parseOption); } // Check the prediction output against its baseline. Contracts.Assert(predOutPath != null); predOutPath.CheckEquality(digitsOfPrecision: digitsOfPrecision, parseOption: parseOption); if (ctx.Command == Cmd.TrainTest) { // Adjust the args so that we no longer have the loader and transform // arguments in there. if (loaderArgIndex >= 0) { args.RemoveAt(loaderArgIndex); } bool foundOut = false; List <int> toRemove = new List <int>(); HashSet <string> removeArgs = new HashSet <string>(); removeArgs.Add("tr="); removeArgs.Add("data="); removeArgs.Add("valid="); removeArgs.Add("norm="); removeArgs.Add("cali="); removeArgs.Add("numcali="); removeArgs.Add("xf="); removeArgs.Add("cache-"); removeArgs.Add("sf="); removeArgs.Add("loader="); for (int i = 0; i < args.Count; ++i) { if (string.IsNullOrWhiteSpace(args[i])) { continue; } if (removeArgs.Any(x => args[i].StartsWith(x))) { toRemove.Add(i); } if (args[i].StartsWith("out=")) { args[i] = string.Format("in={0}", args[i].Substring(4)); } if (args[i].StartsWith("test=")) { args[i] = string.Format("data={0}", args[i].Substring(5)); } foundOut = true; } Contracts.Assert(foundOut); toRemove.Reverse(); foreach (int i in toRemove) { args.RemoveAt(i); } runcmd = string.Join(" ", args.Where(a => !string.IsNullOrWhiteSpace(a))); // Redirect output to the individual log and run the test. var ctx2 = ctx.TestCtx(); OutputPath consOutPath2 = ctx2.StdoutPath(); TestCore(ctx2, "Test", runcmd, digitsOfPrecision, parseOption); if (CheckTestOutputMatchesTrainTest(consOutPath.Path, consOutPath2.Path, 1)) { File.Delete(consOutPath2.Path); } else if (matched) { // The TrainTest output matched the baseline, but the SaveLoadTest output did not, so // append some stuff to the .txt output so comparing output to baselines in BeyondCompare // will show the issue. using (var writer = OpenWriter(consOutPath.Path, true)) { writer.WriteLine("*** Unit Test Failure! ***"); writer.WriteLine("Loaded predictor test results differ! Compare baseline with {0}", consOutPath2.Path); writer.WriteLine("*** Unit Test Failure! ***"); } } // REVIEW: There is nothing analogous to the old predictor output comparison here. // The MAML command does not "export" the result of its training programmatically, that would // allow us to compare it to the loaded model. To verify that the result of the trained model // is the same as its programmatic } }