static void Main(string[] args) { //Parse the input arguments const string inputFolderArg = "dir="; const string numSamplesArg = "num-samples="; const string outputFileArg = "output-file="; string inputFolder = null; int numSamples = 0; string outputFilename = null; string outputBinaryFilename = null; foreach (string arg in args) { if (arg.StartsWith(inputFolderArg)) { inputFolder = arg.Substring(inputFolderArg.Length); } if (arg.StartsWith(numSamplesArg)) { numSamples = int.Parse(arg.Substring(numSamplesArg.Length)); } if (arg.StartsWith(outputFileArg)) { outputFilename = arg.Substring(outputFileArg.Length); } } //check all required arguments are set if (inputFolder == null || numSamples == 0 || outputFilename == null) { Console.WriteLine("ERROR. Usage: SimionLogToOfflineTraining " + inputFolderArg + "<dir> " + numSamplesArg + "<numTuples> " + outputFileArg + "<outputFile>"); return; } //check/set correct file extensions to output files if (!outputFilename.EndsWith(Extensions.SampleFileDescriptor)) { outputFilename += Extensions.SampleFileDescriptor; } outputBinaryFilename = Herd.Utils.RemoveExtension(outputFilename, 2) + Extensions.SampleBinaryFile; //Traverse all the log files string[] logDescriptorFiles = Directory.GetFiles(inputFolder, "*" + Herd.Files.Extensions.LogDescriptor, SearchOption.AllDirectories); int numFiles = logDescriptorFiles.Length; if (numFiles == 0) { Console.WriteLine("ERROR. No log files found in the provided directory: " + inputFolder); return; } int numDesiredSamplesPerFile = numSamples / numFiles; //We use the first descriptor as the common descriptor used for tuples. We are assuming they all have the same variables. BEWARE!! Log.Descriptor commonDescriptor = new Log.Descriptor(logDescriptorFiles[0]); Sampler sampler = new Sampler(commonDescriptor); int numSavedSamples = 0; using (FileStream outputStream = File.Create(outputBinaryFilename)) { BinaryWriter writer = new BinaryWriter(outputStream); Console.WriteLine("STARTED: Drawing " + numSamples + " samples from log files in folder " + inputFolder); foreach (string logDescriptorFilename in logDescriptorFiles) { Log.Descriptor logDescriptor = new Log.Descriptor(logDescriptorFilename); string folder = Path.GetDirectoryName(logDescriptorFilename); Log.Data data = new Log.Data(); data.Load(folder + "\\" + logDescriptor.BinaryLogFile); if (data.SuccessfulLoad) { if (sampler.State.Length + sampler.Action.Length + sampler.Reward.Length == logDescriptor.StateVariables.Count + logDescriptor.ActionVariables.Count + logDescriptor.RewardVariables.Count) { int totalNumSamples = GetTotalNumSamples(data); int actualNumSamples = Math.Min(totalNumSamples - 1, numDesiredSamplesPerFile); double percentSampled = 100 * ((double)actualNumSamples / (double)totalNumSamples); Console.Write("Log file " + Path.GetFileName(logDescriptorFilename) + ":"); for (int i = 0; i < actualNumSamples; i++) { if (sampler.DrawRandomSample(data)) { sampler.SaveLastSampleToBinaryFile(writer); numSavedSamples++; } } Console.WriteLine(string.Format(" sampled ({0:0.00}%)", percentSampled)); } else { Console.WriteLine("File " + logDescriptorFilename + "skipped (missmatched variables in log)"); } } } } //Shuffle the samples in the file Console.WriteLine("Shuffling samples in file"); ShuffleSamplesInFile(outputBinaryFilename, sampler); //Save the descriptor if (numSavedSamples > 0) { sampler.SaveSampleFileDescriptor(outputFilename, Path.GetFileName(outputBinaryFilename), numSavedSamples); Console.WriteLine("FINISHED: " + numSavedSamples + " samples were drawn from the log files and saved\nDescriptor: " + outputFilename + "\nBinary data: " + outputBinaryFilename); } }