/// <summary>
        ///     Analyze the file.
        /// </summary>
        private void AnalyzeFile()
        {
            ScriptProperties prop = _analyst.Script.Properties;

            // get filenames, headers & format
            String sourceID = prop.GetPropertyString(
                ScriptProperties.HeaderDatasourceRawFile);

            FileInfo  sourceFile = _analyst.Script.ResolveFilename(sourceID);
            CSVFormat format     = _analyst.Script.DetermineFormat();
            bool      headers    = _analyst.Script.ExpectInputHeaders(sourceID);

            // read the file
            _rowCount     = 0;
            _missingCount = 0;

            var csv = new ReadCSV(sourceFile.ToString(), headers, format);

            while (csv.Next())
            {
                _rowCount++;
                if (csv.HasMissing())
                {
                    _missingCount++;
                }
            }
            csv.Close();
        }
Exemple #2
0
 /// <summary>
 /// Set the source file. This is useful if you want to use pre-existing stats
 /// to normalize something and skip the analyze step.
 /// </summary>
 ///
 /// <param name="file">The file to use.</param>
 /// <param name="headers">True, if headers are to be expected.</param>
 /// <param name="format">The format of the CSV file.</param>
 public void SetSourceFile(FileInfo file, bool headers,
                           CSVFormat format)
 {
     InputFilename      = file;
     ExpectInputHeaders = headers;
     InputFormat        = format;
 }
Exemple #3
0
 /// <summary>
 ///     Construct an analyzed field.
 /// </summary>
 /// <param name="theScript">The script being analyzed.</param>
 /// <param name="name">The name of the field.</param>
 public AnalyzedField(AnalystScript theScript, String name) : base(name)
 {
     _classMap  = new Dictionary <String, AnalystClassItem>();
     _instances = 0;
     _script    = theScript;
     _fmt       = _script.DetermineFormat();
 }
Exemple #4
0
        /// <summary>
        /// Save the specified matrix.
        /// </summary>
        /// <param name="matrix">The matrix to save.</param>
        /// <param name="xmlOut">The XML writer.</param>
        public static void SaveMatrix(Matrix matrix, WriteXML xmlOut)
        {
            xmlOut.AddAttribute(PersistorUtil.ATTRIBUTE_MATRIX_ROWS, ""
                                + matrix.Rows);
            xmlOut.AddAttribute(PersistorUtil.ATTRIBUTE_MATRIX_COLS, ""
                                + matrix.Cols);
            xmlOut.BeginTag("Matrix");

            CSVFormat format = CSVFormat.EG_FORMAT;

            for (int row = 0; row < matrix.Rows; row++)
            {
                StringBuilder builder = new StringBuilder();

                for (int col = 0; col < matrix.Cols; col++)
                {
                    if (col > 0)
                    {
                        builder.Append(',');
                    }

                    double d = matrix[row, col];
                    builder.Append(format.Format(d, 20));
                }
                xmlOut.BeginTag(PersistorUtil.ROW);
                xmlOut.AddText(builder.ToString());
                xmlOut.EndTag();
            }

            xmlOut.EndTag();
        }
        /// <summary>
        /// Load a CSV file into a memory dataset.
        /// </summary>
        ///
        /// <param name="format">The CSV format to use.</param>
        /// <param name="filename">The filename to load.</param>
        /// <param name="headers">True if there is a header line.</param>
        /// <param name="inputSize">The input size.  Input always comes first in a file.</param>
        /// <param name="idealSize">The ideal size, 0 for unsupervised.</param>
        /// <returns>A NeuralDataSet that holds the contents of the CSV file.</returns>
        public static IMLDataSet LoadCSVTOMemory(CSVFormat format, String filename,
                                                 bool headers, int inputSize, int idealSize)
        {
            var result = new BasicMLDataSet();
            var csv    = new ReadCSV(filename, headers, format);

            while (csv.Next())
            {
                BasicMLData ideal = null;
                int         index = 0;

                var input = new BasicMLData(inputSize);
                for (int i = 0; i < inputSize; i++)
                {
                    double d = csv.GetDouble(index++);
                    input[i] = d;
                }

                if (idealSize > 0)
                {
                    ideal = new BasicMLData(idealSize);
                    for (int i = 0; i < idealSize; i++)
                    {
                        double d = csv.GetDouble(index++);
                        ideal[i] = d;
                    }
                }

                IMLDataPair pair = new BasicMLDataPair(input, ideal);
                result.Add(pair);
            }

            return(result);
        }
        /// <summary>
        ///
        /// </summary>
        ///
        public override sealed bool ExecuteCommand(String args)
        {
            // get filenames
            String sourceID = Prop.GetPropertyString(
                ScriptProperties.GenerateConfigSourceFile);
            String targetID = Prop.GetPropertyString(
                ScriptProperties.GenerateConfigTargetFile);
            CSVFormat format = Analyst.Script.DetermineInputFormat(
                sourceID);

            EncogLogging.Log(EncogLogging.LevelDebug, "Beginning generate");
            EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID);
            EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID);

            FileInfo sourceFile = Script.ResolveFilename(sourceID);
            FileInfo targetFile = Script.ResolveFilename(targetID);

            // mark generated
            Script.MarkGenerated(targetID);

            // read file
            bool headers    = Script.ExpectInputHeaders(sourceID);
            var  headerList = new CSVHeaders(sourceFile, headers,
                                             format);

            int[] input = DetermineInputFields(headerList);
            int[] ideal = DetermineIdealFields(headerList);

            EncogUtility.ConvertCSV2Binary(sourceFile, format, targetFile, input,
                                           ideal, headers);
            return(false);
        }
Exemple #7
0
        private void ConvertEGB2CSV()
        {
            if (_cmd.Args.Count != 2)
            {
                Console.WriteLine(@"Must specify a source and target.");
                return;
            }

            String sourceFile = _cmd.Args[0];
            String targetFile = _cmd.Args[1];

            AnalystFileFormat format1 =
                ConvertStringConst.String2AnalystFileFormat(_cmd.PromptString("format", "decpnt|comma"));
            CSVFormat format = ConvertStringConst.ConvertToCSVFormat(format1);

            new FileInfo(targetFile).Delete();
            IDataSetCODEC codec  = new CSVDataCODEC(targetFile, format, false);
            var           loader = new BinaryDataLoader(codec)
            {
                Status = new ConsoleStatusReportable()
            };

            _sw.Start();
            loader.Binary2External(sourceFile);
        }
Exemple #8
0
        private void ConvertCSV2EGB()
        {
            if (_cmd.Args.Count != 2)
            {
                Console.WriteLine(@"Must specify a source and target.");
                return;
            }

            String sourceFile  = _cmd.Args[0];
            String targetFile  = _cmd.Args[1];
            bool   headers     = _cmd.PromptBoolean("headers", true);
            int    inputCount  = _cmd.PromptInteger("inputCount", 0);
            int    outputCount = _cmd.PromptInteger("outputCount", 0);

            if (inputCount == 0)
            {
                Console.WriteLine(@"Must specify an input count.");
                return;
            }

            AnalystFileFormat format1 =
                ConvertStringConst.String2AnalystFileFormat(_cmd.PromptString("format", "decpnt|comma"));
            CSVFormat format = ConvertStringConst.ConvertToCSVFormat(format1);

            new FileInfo(targetFile).Delete();
            IDataSetCODEC codec = new CSVDataCODEC(sourceFile, format, headers,
                                                   inputCount, outputCount, false);
            var loader = new BinaryDataLoader(codec)
            {
                Status = new ConsoleStatusReportable()
            };

            _sw.Start();
            loader.External2Binary(targetFile);
        }
Exemple #9
0
        public void SetFormatTest()
        {
            CSVFormat format = createFormat();

            CSVWriter csvw = new CSVWriter();

            // Make sure there are no formats to start with
            int actual = csvw.Formats.Count();

            Assert.IsTrue(actual == 0, $"Error: {actual} was returned when 0 were expected");

            // Add some formats
            for (int expected = 1; expected <= 10; expected++)
            {
                csvw.SetFormat(expected - 1, format);

                actual = csvw.Formats.Count();
                Assert.IsTrue(actual == expected, $"Error: {actual} was returned when {expected} were expected");
            }

            // Now set a format
            int rnd = RandomInt(0, 9);

            csvw.SetFormat(rnd, null);

            Assert.IsTrue(csvw.Formats[rnd] == null, $"Error: Format [{rnd}] returned a non-null value when NULL was expected");
        }
Exemple #10
0
        /// <summary>
        /// Convert a CSV file to binary.
        /// </summary>
        /// <param name="csvFile">The CSV file to convert.</param>
        /// <param name="format">The format.</param>
        /// <param name="binFile">The binary file.</param>
        /// <param name="input">The input.</param>
        /// <param name="ideal">The ideal.</param>
        /// <param name="headers">True, if headers are present.</param>
        public static void ConvertCSV2Binary(FileInfo csvFile, CSVFormat format,
                                             FileInfo binFile, int[] input, int[] ideal, bool headers)
        {
            binFile.Delete();
            var csv = new ReadCSV(csvFile.ToString(), headers, format);

            var buffer = new BufferedMLDataSet(binFile.ToString());

            buffer.BeginLoad(input.Length, ideal.Length);
            while (csv.Next())
            {
                var inputData = new BasicMLData(input.Length);
                var idealData = new BasicMLData(ideal.Length);

                // handle input data
                for (int i = 0; i < input.Length; i++)
                {
                    inputData[i] = csv.GetDouble(input[i]);
                }

                // handle input data
                for (int i = 0; i < ideal.Length; i++)
                {
                    idealData[i] = csv.GetDouble(ideal[i]);
                }

                // add to dataset

                buffer.Add(inputData, idealData);
            }
            buffer.EndLoad();
            buffer.Close();
            csv.Close();
        }
Exemple #11
0
        /// <summary>
        ///     Analyze the data. This counts the records and prepares the data to be
        ///     processed.
        /// </summary>
        /// <param name="theAnalyst">The analyst to use.</param>
        /// <param name="inputFile">The input file.</param>
        /// <param name="headers">True if headers are present.</param>
        /// <param name="format">The format the file is in.</param>
        public void Analyze(EncogAnalyst theAnalyst,
                            FileInfo inputFile, bool headers, CSVFormat format)
        {
            InputFilename      = inputFile;
            ExpectInputHeaders = headers;
            Format             = format;
            _analyst           = theAnalyst;

            Analyzed = true;

            PerformBasicCounts();

            _inputCount  = _analyst.DetermineInputCount();
            _outputCount = _analyst.DetermineOutputCount();
            _idealCount  = InputHeadings.Length - _inputCount;

            if ((InputHeadings.Length != _inputCount) &&
                (InputHeadings.Length != (_inputCount + _outputCount)))
            {
                throw new AnalystError("Invalid number of columns("
                                       + InputHeadings.Length + "), must match input("
                                       + _inputCount + ") count or input+output("
                                       + (_inputCount + _outputCount) + ") count.");
            }
        }
Exemple #12
0
        /// <summary>
        /// Save the dataset to a CSV file.
        /// </summary>
        /// <param name="targetFile">The target file.</param>
        /// <param name="format">The format to use.</param>
        /// <param name="set">The data set.</param>
        public static void SaveCSV(FileInfo targetFile, CSVFormat format, IMLDataSet set)
        {
            try
            {
                var file = new StreamWriter(targetFile.ToString());

                foreach (IMLDataPair data in set)
                {
                    var line = new StringBuilder();

                    for (int i = 0; i < data.Input.Count; i++)
                    {
                        double d = data.Input[i];
                        BasicFile.AppendSeparator(line, format);
                        line.Append(format.Format(d, EncogFramework.DefaultPrecision));
                    }

                    for (int i = 0; i < data.Ideal.Count; i++)
                    {
                        double d = data.Ideal[i];
                        BasicFile.AppendSeparator(line, format);
                        line.Append(format.Format(d, EncogFramework.DefaultPrecision));
                    }

                    file.WriteLine(line);
                }

                file.Close();
            }
            catch (IOException ex)
            {
                throw new EncogError(ex);
            }
        }
Exemple #13
0
        /// <inheritdoc />
        public override sealed bool ExecuteCommand(String args)
        {
            // get filenames
            String sourceID = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigSourceFile);
            String targetID = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigTargetFile);

            EncogLogging.Log(EncogLogging.LevelDebug, "Beginning balance");
            EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID);
            EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID);

            FileInfo sourceFile = Script.ResolveFilename(sourceID);
            FileInfo targetFile = Script.ResolveFilename(targetID);

            // get other config data
            int countPer = Prop.GetPropertyInt(
                ScriptProperties.BalanceConfigCountPer);
            String targetFieldStr = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigBalanceField);
            DataField targetFieldDf = Analyst.Script.FindDataField(
                targetFieldStr);

            if (targetFieldDf == null)
            {
                throw new AnalystError("Can't find balance target field: "
                                       + targetFieldStr);
            }
            if (!targetFieldDf.Class)
            {
                throw new AnalystError("Can't balance on non-class field: "
                                       + targetFieldStr);
            }

            int targetFieldIndex = Analyst.Script
                                   .FindDataFieldIndex(targetFieldDf);

            // mark generated
            Script.MarkGenerated(targetID);

            // get formats
            CSVFormat inputFormat  = Script.DetermineFormat();
            CSVFormat outputFormat = Script.DetermineFormat();

            // prepare to normalize
            var balance = new BalanceCSV {
                Script = Script
            };

            Analyst.CurrentQuantTask = balance;
            balance.Report           = new AnalystReportBridge(Analyst);

            bool headers = Script.ExpectInputHeaders(sourceID);

            balance.Analyze(sourceFile, headers, inputFormat);
            balance.ProduceOutputHeaders = true;
            balance.Process(targetFile, targetFieldIndex, countPer);
            Analyst.CurrentQuantTask = null;
            return(balance.ShouldStop());
        }
Exemple #14
0
        /// <inheritdoc />
        public override sealed bool ExecuteCommand(String args)
        {
            // get filenames
            String sourceID = Prop.GetPropertyString(
                ScriptProperties.RandomizeConfigSourceFile);
            String targetID = Prop.GetPropertyString(
                ScriptProperties.RandomizeConfigTargetFile);

            EncogLogging.Log(EncogLogging.LevelDebug, "Beginning randomize");
            EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID);
            EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID);

            FileInfo sourceFile = Script.ResolveFilename(sourceID);
            FileInfo targetFile = Script.ResolveFilename(targetID);

            // get formats
            CSVFormat format = Script.DetermineFormat();

            // mark generated
            Script.MarkGenerated(targetID);

            // prepare to normalize
            var norm = new ShuffleCSV {
                Script = Script
            };

            Analyst.CurrentQuantTask = norm;
            norm.Report = new AnalystReportBridge(Analyst);
            bool headers = Script.ExpectInputHeaders(sourceID);

            norm.Analyze(sourceFile, headers, format);
            norm.Process(targetFile);
            Analyst.CurrentQuantTask = null;
            return(norm.ShouldStop());
        }
 /// <summary>
 ///     Construct a CSV source from a filename. Allows a delimiter character to
 ///     be specified.
 /// </summary>
 /// <param name="file">The filename.</param>
 /// <param name="headers">The headers.</param>
 /// <param name="format">The format.</param>
 public CSVDataSource(string file, bool headers,
                      CSVFormat format)
 {
     _file    = file;
     _headers = headers;
     _format  = format;
 }
 /// <summary>
 ///     Construct a CSV source from a filename. The format parameter specifies
 ///     the separator character to use, as well as the number format.
 /// </summary>
 /// <param name="file">The filename.</param>
 /// <param name="headers">The headers.</param>
 /// <param name="delim">The delimiter.</param>
 public CSVDataSource(string file, bool headers,
                      char delim)
 {
     _format = new CSVFormat(CSVFormat.DecimalCharacter,
                             delim);
     _headers = headers;
     _file    = file;
 }
        /// <summary>
        /// Determine the input format for the specified file.
        /// </summary>
        ///
        /// <param name="sourceID">The file.</param>
        /// <returns>The input format.</returns>
        public CSVFormat DetermineInputFormat(String sourceID)
        {
            String rawID = Properties.GetPropertyString(
                ScriptProperties.HeaderDatasourceRawFile);

            CSVFormat result = Properties.GetPropertyCSVFormat(sourceID.Equals(rawID) ? ScriptProperties.HeaderDatasourceSourceFormat : ScriptProperties.SetupConfigCSVFormat);

            return(result);
        }
 /// <summary>
 ///     Append a separator. The separator will only be appended if the line is
 ///     not empty.  This is used to build comma(or other) separated lists.
 /// </summary>
 /// <param name="line">The line to append to.</param>
 /// <param name="format">The format to use.</param>
 public static void AppendSeparator(StringBuilder line,
                                    CSVFormat format)
 {
     if ((line.Length > 0) &&
         !line.ToString().EndsWith(format.Separator + ""))
     {
         line.Append(format.Separator);
     }
 }
 /// <summary>
 /// Construct this data set using a comma as a delimiter.
 /// </summary>
 /// <param name="filename">The CSV filename to read.</param>
 /// <param name="inputSize">The number of columns that make up the input set.</param>
 /// <param name="idealSize">The number of columns that make up the ideal set.</param>
 /// <param name="headers">True if headers are present on the first line.</param>
 /// <param name="format">The format to use.</param>
 public CSVNeuralDataSet(String filename, int inputSize,
                         int idealSize, bool headers, CSVFormat format)
 {
     this.filename  = filename;
     this.inputSize = inputSize;
     this.idealSize = idealSize;
     this.format    = format;
     this.headers   = headers;
 }
Exemple #20
0
        /// <summary>
        /// Construct a loaded row from an IMLData.
        /// </summary>
        /// <param name="format">The format to store the numbers in.</param>
        /// <param name="data">The data to use.</param>
        /// <param name="extra">The extra positions to allocate.</param>
        public LoadedRow(CSVFormat format, IMLData data, int extra)
        {
            int count = data.Count;

            _data = new String[count + extra];
            for (int i = 0; i < count; i++)
            {
                _data[i] = format.Format(data[i], 5);
            }
        }
Exemple #21
0
        /// <summary>
        /// Construct a loaded row from an array.
        /// </summary>
        /// <param name="format">The format to store the numbers in.</param>
        /// <param name="data">The data to use.</param>
        /// <param name="extra">The extra positions to allocate.</param>
        public LoadedRow(CSVFormat format, double[] data, int extra)
        {
            int count = data.Length;

            _data = new String[count + extra];
            for (int i = 0; i < count; i++)
            {
                _data[i] = format.Format(data[i], 5);
            }
        }
Exemple #22
0
        /// <summary>
        ///     Analyze the file.
        /// </summary>
        /// <param name="inputFile">The name of the input file.</param>
        /// <param name="headers">True, if headers are expected.</param>
        /// <param name="format">The format.</param>
        public void Analyze(FileInfo inputFile, bool headers,
                            CSVFormat format)
        {
            InputFilename      = inputFile;
            ExpectInputHeaders = headers;
            Format             = format;

            Analyzed = true;

            PerformBasicCounts();
        }
Exemple #23
0
        /// <summary>
        ///     Load financial data.
        /// </summary>
        /// <param name="ticker">The ticker symbol.</param>
        /// <param name="output">The output file.</param>
        /// <param name="outputFormat">The output format.</param>
        /// <param name="from">Starting date.</param>
        /// <param name="to">Ending date.</param>
        public void LoadAllData(String ticker, String output, CSVFormat outputFormat, DateTime from,
                                DateTime to)
        {
            try
            {
                Uri        urlData      = BuildURL(ticker, from, to);
                WebRequest httpData     = WebRequest.Create(urlData);
                var        responseData = (HttpWebResponse)httpData.GetResponse();

                if (responseData != null)
                {
                    Stream istreamData = responseData.GetResponseStream();
                    var    csvData     = new ReadCSV(istreamData, true, CSVFormat.English);

                    TextWriter tw = new StreamWriter(output);
                    tw.WriteLine("date,time,open price,high price,low price,close price,volume,adjusted price");

                    while (csvData.Next())
                    {
                        DateTime date          = csvData.GetDate("date");
                        double   adjustedClose = csvData.GetDouble("adj close");
                        double   open          = csvData.GetDouble("open");
                        double   close         = csvData.GetDouble("close");
                        double   high          = csvData.GetDouble("high");
                        double   low           = csvData.GetDouble("low");
                        var      volume        = (long)csvData.GetDouble("volume");

                        var line = new StringBuilder();
                        line.Append(NumericDateUtil.DateTime2Long(date));
                        line.Append(outputFormat.Separator);
                        line.Append(NumericDateUtil.Time2Int(date));
                        line.Append(outputFormat.Separator);
                        line.Append(outputFormat.Format(open, Precision));
                        line.Append(outputFormat.Separator);
                        line.Append(outputFormat.Format(high, Precision));
                        line.Append(outputFormat.Separator);
                        line.Append(outputFormat.Format(low, Precision));
                        line.Append(outputFormat.Separator);
                        line.Append(outputFormat.Format(close, Precision));
                        line.Append(outputFormat.Separator);
                        line.Append(volume);
                        line.Append(outputFormat.Separator);
                        line.Append(outputFormat.Format(adjustedClose, Precision));
                        tw.WriteLine(line.ToString());
                    }

                    tw.Close();
                }
            }
            catch (WebException ex)
            {
                throw new QuantError(ex);
            }
        }
Exemple #24
0
        /// <summary>
        ///     Process, and sort the files.
        /// </summary>
        /// <param name="inputFile">The input file.</param>
        /// <param name="outputFile">The output file.</param>
        /// <param name="headers">True, if headers are to be used.</param>
        /// <param name="format">The format of the file.</param>
        public void Process(FileInfo inputFile, FileInfo outputFile,
                            bool headers, CSVFormat format)
        {
            InputFilename      = inputFile;
            ExpectInputHeaders = headers;
            Format             = format;

            ReadInputFile();
            SortData();
            WriteOutputFile(outputFile);
        }
        /// <summary>
        /// parses one column of a csv and returns an array of doubles.
        /// you can only return one double array with this method.
        /// </summary>
        /// <param name="file">The file.</param>
        /// <param name="formatused">The formatused.</param>
        /// <param name="Name">The name of the column to parse..</param>
        /// <returns></returns>
        public static List <double> QuickParseCSV(string file, CSVFormat formatused, string Name)
        {
            List <double> returnedArrays = new List <double>();
            ReadCSV       csv            = new ReadCSV(file, true, formatused);

            while (csv.Next())
            {
                returnedArrays.Add(csv.GetDouble(Name));
            }
            return(returnedArrays);
        }
Exemple #26
0
        public VersatileMLDataSet LoadDataSetFromCsv(string filename)
        {
            var csvFormatSpec = new CSVFormat('.', '\t');
            var dataSource    = new CSVDataSource(filename, headers: true, format: csvFormatSpec);

            var dataSet = new VersatileMLDataSet(dataSource)
            {
                NormHelper = { Format = csvFormatSpec }
            };

            return(dataSet);
        }
        /// <summary>
        /// Prepare the output file, write headers if needed.
        /// </summary>
        ///
        /// <param name="outputFile">The name of the output file.</param>
        /// <returns>The output stream for the text file.</returns>
        public StreamWriter PrepareOutputFile(FileInfo outputFile)
        {
            try
            {
                outputFile.Delete();
                var tw = new StreamWriter(outputFile.OpenWrite());
                if (_outputFormat == null)
                {
                    _outputFormat = _inputFormat;
                }

                // write headers, if needed
                if (_produceOutputHeaders)
                {
                    var line = new StringBuilder();

                    if (_inputHeadings != null)
                    {
                        foreach (String str  in  _inputHeadings)
                        {
                            if (line.Length > 0)
                            {
                                line.Append(_outputFormat.Separator);
                            }
                            line.Append("\"");
                            line.Append(str);
                            line.Append("\"");
                        }
                    }
                    else
                    {
                        for (int i = 0; i < _columnCount; i++)
                        {
                            line.Append("\"field:");
                            line.Append(i + 1);
                            line.Append("\"");
                        }
                    }
                    tw.WriteLine(line.ToString());
                }

                return(tw);
            }
            catch (IOException e)
            {
                throw new QuantError(e);
            }
        }
Exemple #28
0
        /// <summary>
        /// Convert a CSV file to a binary training file.
        /// </summary>
        /// <param name="csvFile">The CSV file.</param>
        /// <param name="format">The format.</param>
        /// <param name="binFile">The binary file.</param>
        /// <param name="inputCount">The number of input values.</param>
        /// <param name="outputCount">The number of output values.</param>
        /// <param name="headers">True, if there are headers on the3 CSV.</param>
        /// <param name="expectSignificance">Should a significance column be expected.</param>
        public static void ConvertCSV2Binary(String csvFile, CSVFormat format,
                                             String binFile, int inputCount, int outputCount,
                                             bool headers, bool expectSignificance)
        {
            new FileInfo(binFile).Delete();

            var csv = new CSVMLDataSet(csvFile,
                                       inputCount, outputCount, false, format, expectSignificance);
            var buffer = new BufferedMLDataSet(binFile);
            buffer.BeginLoad(inputCount, outputCount);
            foreach (IMLDataPair pair in csv)
            {
                buffer.Add(pair);
            }
            buffer.EndLoad();
        }
Exemple #29
0
        /// <summary>
        /// Analyze the data. This counts the records and prepares the data to be
        /// processed.
        /// </summary>
        ///
        /// <param name="theAnalyst">The analyst to use.</param>
        /// <param name="inputFile">The input file to analyze.</param>
        /// <param name="headers">True, if the input file has headers.</param>
        /// <param name="format">The format of the input file.</param>
        public void Analyze(EncogAnalyst theAnalyst,
                            FileInfo inputFile, bool headers, CSVFormat format)
        {
            InputFilename      = inputFile;
            ExpectInputHeaders = headers;
            InputFormat        = format;

            Analyzed = true;
            _analyst = theAnalyst;

            if (OutputFormat == null)
            {
                OutputFormat = InputFormat;
            }

            _data = new BasicMLDataSet();
            ResetStatus();
            int recordCount = 0;

            int outputLength = _analyst.DetermineTotalColumns();
            var csv          = new ReadCSV(InputFilename.ToString(),
                                           ExpectInputHeaders, InputFormat);

            ReadHeaders(csv);

            _analystHeaders = new CSVHeaders(InputHeadings);

            while (csv.Next() && !ShouldStop())
            {
                UpdateStatus(true);

                var row = new LoadedRow(csv, 1);

                double[] inputArray = AnalystNormalizeCSV.ExtractFields(
                    _analyst, _analystHeaders, csv, outputLength, true);
                var input = new ClusterRow(inputArray, row);
                _data.Add(input);

                recordCount++;
            }
            RecordCount = recordCount;
            Count       = csv.ColumnCount;

            ReadHeaders(csv);
            csv.Close();
            ReportDone(true);
        }
        /// <summary>
        /// Construct this data set using a comma as a delimiter.
        /// </summary>
        /// <param name="filename">The CSV filename to read.</param>
        /// <param name="inputSize">The number of columns that make up the input set.</param>
        /// <param name="idealSize">The number of columns that make up the ideal set.</param>
        /// <param name="headers">True if headers are present on the first line.</param>
        /// <param name="format">The format to use.</param>
        public CSVMLDataSet(String filename, int inputSize,
                            int idealSize, bool headers, CSVFormat format, bool expectSignificance)
        {
            _filename  = filename;
            _inputSize = inputSize;
            _idealSize = idealSize;
            _format    = format;
            _headers   = headers;

            IDataSetCODEC codec = new CSVDataCODEC(filename, format, headers, inputSize, idealSize, expectSignificance);
            var           load  = new MemoryDataLoader(codec)
            {
                Result = this
            };

            load.External2Memory();
        }