This class is not memory based, so very long files can be used, without running out of memory. This dataset uses a Encog binary training file as a buffer. When used with a slower access dataset, such as CSV, XML or SQL, where parsing must occur, this dataset can be used to load from the slower dataset and train at much higher speeds. If you are going to create a binary file, by using the add methods, you must call beginLoad to cause Encog to open an output file. Once the data has been loaded, call endLoad. You can also use the BinaryDataLoader class, with a CODEC, to load many other popular external formats. The binary files produced by this class are in the Encog binary training format, and can be used with any Encog platform. Encog binary files are stored using "little endian" numbers.
Inheritance: IMLDataSet
        public void TestBufferData()
        {
            File.Delete(Filename);
            var set = new BufferedMLDataSet(Filename);
            set.BeginLoad(2, 1);
            for (int i = 0; i < XOR.XORInput.Length; i++)
            {
                var input = new BasicMLData(XOR.XORInput[i]);
                var ideal = new BasicMLData(XOR.XORIdeal[i]);
                set.Add(input, ideal);
            }
            set.EndLoad();

            XOR.TestXORDataSet(set);
        }
 /// <summary>
 /// Convert a CSV file to a binary training file.
 /// </summary>
 /// <param name="csvFile">The CSV file.</param>
 /// <param name="format">The format.</param>
 /// <param name="binFile">The binary file.</param>
 /// <param name="inputCount">The number of input values.</param>
 /// <param name="outputCount">The number of output values.</param>
 /// <param name="headers">True, if there are headers on the3 CSV.</param>
 /// <param name="expectSignificance">Should a significance column be expected.</param>
 public static void ConvertCSV2Binary(String csvFile, CSVFormat format,
                                      String binFile, int inputCount, int outputCount,
                                      bool headers, bool  expectSignificance)
 {
     new FileInfo(binFile).Delete();
             
     var csv = new CSVMLDataSet(csvFile,
                                inputCount, outputCount, false, format, expectSignificance);
     var buffer = new BufferedMLDataSet(binFile);
     buffer.BeginLoad(inputCount, outputCount);
     foreach (IMLDataPair pair in csv)
     {
         buffer.Add(pair);
     }
     buffer.EndLoad();
 }
        public void Train(bool useGui)
        {
            // load, or create the neural network
            BasicNetwork network;

            if (!_config.TrainedNetworkFile.Exists)
            {
                throw new EncogError(@"Can't find neural network file, please generate data");
            }

            network = (BasicNetwork) EncogDirectoryPersistence.LoadObject(_config.TrainedNetworkFile);

            // convert training data
            Console.WriteLine(@"Converting training file to binary");


            EncogUtility.ConvertCSV2Binary(
                _config.NormalizedDataFile.ToString(),
                CSVFormat.English,
                _config.BinaryFile.ToString(),
                network.InputCount,
                network.OutputCount,
                false, false);

            var trainingSet = new BufferedMLDataSet(
                _config.BinaryFile.ToString());


            if (useGui)
            {
                EncogUtility.TrainDialog(network, trainingSet);
            }
            else
            {
                EncogUtility.TrainConsole(network, trainingSet,
                                          _config.TrainingMinutes);
            }

            Console.WriteLine(@"Training complete, saving network...");
            EncogDirectoryPersistence.SaveObject(_config.TrainedNetworkFile, network);
        }
 /// <summary>
 /// Save the training set to an EGB file.
 /// </summary>
 /// <param name="egbFile">The EGB file to save to.</param>
 /// <param name="data">The training data to save.</param>
 public static void SaveEGB(FileInfo egbFile, IMLDataSet data)
 {
     var binary = new BufferedMLDataSet(egbFile.ToString());
     binary.Load(data);
     data.Close();
 }
 /// <summary>
 /// Load an EGB file to memory.
 /// </summary>
 /// <param name="filename">The file to load.</param>
 /// <returns>A memory data set.</returns>
 public static IMLDataSet LoadEGB2Memory(FileInfo filename)
 {
     var buffer = new BufferedMLDataSet(filename.ToString());
     return buffer.LoadToMemory();
 }
        /// <summary>
        /// Evaluate disk.
        /// </summary>
        private void EvalBinary()
        {
            FileInfo file = FileUtil.CombinePath( new FileInfo(Path.GetTempPath()), "temp.egb" );

            BasicMLDataSet training = RandomTrainingFactory.Generate(
                1000, 10000, 10, 10, -1, 1);

            // create the binary file

            if (file.Exists)
            {
                file.Delete();
            }

            var training2 = new BufferedMLDataSet(file.ToString());
            training2.Load(training);

            const long stop = (10*Evaluate.Milis);
            int record = 0;

            IMLDataPair pair;

            var watch = new Stopwatch();
            watch.Start();

            int iterations = 0;
            while(true)
            {
                iterations++;
                pair = training[record++];
                if(record >= training.Count)
                    record = 0;

                if((iterations & 0xff) == 0 && watch.ElapsedMilliseconds >= stop)
                    break;
            }

            training2.Close();

            iterations /= 100000;

            _report.Report(Steps, Step3,
                          "Disk(binary) dataset, result: "
                          + Format.FormatInteger(iterations));

            if (file.Exists)
            {
                file.Delete();
            }
            _binaryScore = iterations;
        }
 /// <summary>
 /// Remove an additional dataset that was created. 
 /// </summary>
 /// <param name="child">The additional dataset to remove.</param>
 public void RemoveAdditional(BufferedMLDataSet child)
 {
     lock (this)
     {
         _additional.Remove(child);
     }
 }
 /// <summary>
 /// Open an additional training set.
 /// </summary>
 /// <returns>An additional training set.</returns>
 public IMLDataSet OpenAdditional()
 {
     var result = new BufferedMLDataSet(_file) {_owner = this};
     _additional.Add(result);
     return result;
 }
        /// <summary>
        /// Called to generate the training file.
        /// </summary>
        public void Generate()
        {
            string[] list = Directory.GetFiles(_path);

            _trainingFile.Delete();
            var output = new BufferedMLDataSet(_trainingFile.ToString());
            output.BeginLoad(Config.InputWindow, 1);

            foreach (string file in list)
            {
                var fn = new FileInfo(file);
                if (fn.Name.StartsWith("collected") && fn.Name.EndsWith(".csv"))
                {
                    ProcessFile(file, output);
                }
            }

            output.EndLoad();
            output.Close();
        }
Exemple #10
0
 public IMLDataSet OpenAdditional()
 {
     BufferedMLDataSet item = new BufferedMLDataSet(this.xb44380e048627945) {
         _x071bde1041617fce = this
     };
     this.xaa0d3e5126463e13.Add(item);
     return item;
 }
 /// <summary>
 /// Load an EGB file to memory.
 /// </summary>
 /// <param name="filename">The file to load.</param>
 /// <returns>A memory data set.</returns>
 public static IMLDataSet LoadEGB2Memory(FileInfo filename)
 {
     var buffer = new BufferedMLDataSet(filename.ToString());
     var result = buffer.LoadToMemory();
     buffer.Close();
     return result;
 }
 /// <summary>
 /// Construct the buffered enumerator. This is where the file is actually
 /// opened.
 /// </summary>
 /// <param name="owner">The object that created this enumeration.</param>
 public BufferedNeuralDataSetEnumerator(BufferedMLDataSet owner)
 {
     _data    = owner;
     _current = 0;
 }
Exemple #13
0
 public static void ConvertCSV2Binary(FileInfo csvFile, CSVFormat format, FileInfo binFile, int[] input, int[] ideal, bool headers)
 {
     ReadCSV dcsv;
     BufferedMLDataSet set;
     BasicMLData data;
     BasicMLData data2;
     int num;
     int num2;
     binFile.Delete();
     goto Label_00FB;
     Label_0021:
     if (dcsv.Next() || ((((uint) num) - ((uint) num2)) > uint.MaxValue))
     {
         data = new BasicMLData(input.Length);
         if ((((uint) headers) | uint.MaxValue) != 0)
         {
             data2 = new BasicMLData(ideal.Length);
             if (4 != 0)
             {
                 if (((uint) num) <= uint.MaxValue)
                 {
                     goto Label_0073;
                 }
                 goto Label_00FB;
             }
         }
         goto Label_00C0;
     }
     set.EndLoad();
     if (0 == 0)
     {
         return;
     }
     Label_0073:
     num = 0;
     while (num < input.Length)
     {
         data[num] = dcsv.GetDouble(input[num]);
         num++;
     }
     for (num2 = 0; num2 < ideal.Length; num2++)
     {
         data2[num2] = dcsv.GetDouble(ideal[num2]);
     }
     set.Add(data, data2);
     goto Label_0021;
     Label_00C0:
     set = new BufferedMLDataSet(binFile.ToString());
     set.BeginLoad(input.Length, ideal.Length);
     goto Label_0021;
     Label_00FB:
     dcsv = new ReadCSV(csvFile.ToString(), headers, format);
     goto Label_00C0;
 }
Exemple #14
0
 public static void ConvertCSV2Binary(string csvFile, CSVFormat format, string binFile, int inputCount, int outputCount, bool headers, bool expectSignificance)
 {
     new FileInfo(binFile).Delete();
     CSVMLDataSet set = new CSVMLDataSet(csvFile, inputCount, outputCount, false, format, expectSignificance);
     BufferedMLDataSet set2 = new BufferedMLDataSet(binFile);
     set2.BeginLoad(inputCount, outputCount);
     if ((((uint) inputCount) & 0) == 0)
     {
         foreach (IMLDataPair pair in set)
         {
             set2.Add(pair);
         }
         set2.EndLoad();
     }
 }
Exemple #15
0
 public void RemoveAdditional(BufferedMLDataSet child)
 {
     lock (this)
     {
         this.xaa0d3e5126463e13.Remove(child);
     }
 }
        /// <summary>
        /// Process the individual training file. 
        /// </summary>
        /// <param name="file">The training file to process.</param>
        /// <param name="output">The data set to output to.</param>
        protected void ProcessFile(string file, BufferedMLDataSet output)
        {
            var inputData = new BasicMLData(output.InputSize);
            var idealData = new BasicMLData(output.IdealSize);

            var csv = new ReadCSV(file, true, CSVFormat.English);
            while (csv.Next())
            {
                var a = new double[Config.InputWindow + 1];
                double close = csv.GetDouble(1);

                const int fastIndex = 2;
                const int slowIndex = fastIndex + Config.InputWindow;

                a[0] = close;
                for (int i = 0; i < 3; i++)
                {
                    double fast = csv.GetDouble(fastIndex + i);
                    double slow = csv.GetDouble(slowIndex + i);
                    double diff = _fieldDifference.Normalize((fast - slow)/Config.PipSize);
                    a[i + 1] = diff;
                }
                _window.Add(a);

                if (_window.IsFull())
                {
                    double max = (_window.CalculateMax(0, Config.InputWindow) - close)/Config.PipSize;
                    double min = (_window.CalculateMin(0, Config.InputWindow) - close)/Config.PipSize;

                    double o = Math.Abs(max) > Math.Abs(min) ? max : min;

                    a = _window.GetLast();
                    for (int i = 0; i < 3; i++)
                    {
                        inputData[i] = a[i + 1];
                    }

                    o = _fieldOutcome.Normalize(o);
                    idealData[0] = o;

                    output.Add(inputData, idealData);
                }
            }
        }
        /// <summary>
        /// Convert a CSV file to binary.
        /// </summary>
        /// <param name="csvFile">The CSV file to convert.</param>
        /// <param name="format">The format.</param>
        /// <param name="binFile">The binary file.</param>
        /// <param name="input">The input.</param>
        /// <param name="ideal">The ideal.</param>
        /// <param name="headers">True, if headers are present.</param>
        public static void ConvertCSV2Binary(FileInfo csvFile, CSVFormat format,
                                             FileInfo binFile, int[] input, int[] ideal, bool headers)
        {
            binFile.Delete();
            var csv = new ReadCSV(csvFile.ToString(), headers, format);

            var buffer = new BufferedMLDataSet(binFile.ToString());
            buffer.BeginLoad(input.Length, ideal.Length);
            while (csv.Next())
            {
                var inputData = new BasicMLData(input.Length);
                var idealData = new BasicMLData(ideal.Length);

                // handle input data
                for (int i = 0; i < input.Length; i++)
                {
                    inputData[i] = csv.GetDouble(input[i]);
                }

                // handle input data
                for (int i = 0; i < ideal.Length; i++)
                {
                    idealData[i] = csv.GetDouble(ideal[i]);
                }

                // add to dataset

                buffer.Add(inputData, idealData);
            }
            buffer.EndLoad();
        }
Exemple #18
0
        private void TrainCommand()
        {
            String methodFile = _cmd.Args[0];
            String trainingFile = _cmd.Args[1];

            String type = _cmd.PromptString("type", "rprop");
            String args = _cmd.PromptString("args", "");
            double maxError = _cmd.PromptDouble("maxError", 0.01);

            var dataSet = new BufferedMLDataSet(trainingFile);
            var method = (IMLMethod) EncogDirectoryPersistence.LoadObject(new FileInfo(methodFile));
            var factory = new MLTrainFactory();
            IMLTrain train = factory.Create(method, dataSet, type, args);
            _sw.Start();
            EncogUtility.TrainToError(train, maxError);
            Console.WriteLine(@"Saving machine learning method");
            EncogDirectoryPersistence.SaveObject(new FileInfo(methodFile), method);
        }