/// <summary>
        ///
        /// </summary>
        ///
        public override sealed bool ExecuteCommand(String args)
        {
            // get filenames
            String sourceID = Prop.GetPropertyString(
                ScriptProperties.GenerateConfigSourceFile);
            String targetID = Prop.GetPropertyString(
                ScriptProperties.GenerateConfigTargetFile);
            CSVFormat format = Analyst.Script.DetermineInputFormat(
                sourceID);

            EncogLogging.Log(EncogLogging.LevelDebug, "Beginning generate");
            EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID);
            EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID);

            FileInfo sourceFile = Script.ResolveFilename(sourceID);
            FileInfo targetFile = Script.ResolveFilename(targetID);

            // mark generated
            Script.MarkGenerated(targetID);

            // read file
            bool headers    = Script.ExpectInputHeaders(sourceID);
            var  headerList = new CSVHeaders(sourceFile, headers,
                                             format);

            int[] input = DetermineInputFields(headerList);
            int[] ideal = DetermineIdealFields(headerList);

            EncogUtility.ConvertCSV2Binary(sourceFile, format, targetFile, input,
                                           ideal, headers);
            return(false);
        }
        /// <summary>
        /// Determine the input fields.
        /// </summary>
        ///
        /// <param name="headerList">The headers.</param>
        /// <returns>The indexes of the input fields.</returns>
        private int[] DetermineInputFields(CSVHeaders headerList)
        {
            IList <Int32> fields = new List <Int32>();

            for (int currentIndex = 0; currentIndex < headerList.Size(); currentIndex++)
            {
                String       baseName = headerList.GetBaseHeader(currentIndex);
                int          slice    = headerList.GetSlice(currentIndex);
                AnalystField field    = Analyst.Script
                                        .FindNormalizedField(baseName, slice);

                if (field != null && field.Input)
                {
                    fields.Add(currentIndex);
                }
            }

            // allocate result array
            var result = new int[fields.Count];

            for (int i = 0; i < result.Length; i++)
            {
                result[i] = (fields[i]);
            }

            return(result);
        }
Exemple #3
0
        /// <summary>
        ///     Generate the fields using header values.
        /// </summary>
        /// <param name="csv">The CSV file to use.</param>
        private void GenerateFieldsFromHeaders(ReadCSV csv)
        {
            var h = new CSVHeaders(csv.ColumnNames);

            _fields = new AnalyzedField[csv.ColumnCount];
            for (int i = 0; i < _fields.Length; i++)
            {
                if (i >= csv.ColumnCount)
                {
                    throw new AnalystError(
                              "CSV header count does not match column count");
                }
                _fields[i] = new AnalyzedField(_script, h.GetHeader(i));
            }
        }
Exemple #4
0
        /// <summary>
        /// Prepare the output file, write headers if needed.
        /// </summary>
        ///
        /// <param name="outputFile">The output file.</param>
        ///<returns>The file to write to.</returns>
        private new StreamWriter PrepareOutputFile(FileInfo outputFile)
        {
            try
            {
                outputFile.Delete();
                var tw = new StreamWriter(outputFile.OpenWrite());

                // write headers, if needed
                if (ProduceOutputHeaders)
                {
                    var line = new StringBuilder();


                    // handle provided fields, not all may be used, but all should
                    // be displayed
                    foreach (String heading  in  InputHeadings)
                    {
                        AppendSeparator(line, Format);
                        line.Append("\"");
                        line.Append(heading);
                        line.Append("\"");
                    }


                    // now add the output fields that will be generated
                    foreach (AnalystField field  in  _analyst.Script.Normalize.NormalizedFields)
                    {
                        if (field.Output && !field.Ignored)
                        {
                            AppendSeparator(line, Format);
                            line.Append("\"Output:");
                            line.Append(CSVHeaders.TagColumn(field.Name, 0,
                                                             field.TimeSlice, false));
                            line.Append("\"");
                        }
                    }

                    tw.WriteLine(line.ToString());
                }

                return(tw);
            }
            catch (IOException e)
            {
                throw new QuantError(e);
            }
        }
Exemple #5
0
        /// <summary>
        /// Analyze the data. This counts the records and prepares the data to be
        /// processed.
        /// </summary>
        ///
        /// <param name="theAnalyst">The analyst to use.</param>
        /// <param name="inputFile">The input file to analyze.</param>
        /// <param name="headers">True, if the input file has headers.</param>
        /// <param name="format">The format of the input file.</param>
        public void Analyze(EncogAnalyst theAnalyst,
                            FileInfo inputFile, bool headers, CSVFormat format)
        {
            InputFilename      = inputFile;
            ExpectInputHeaders = headers;
            InputFormat        = format;

            Analyzed = true;
            _analyst = theAnalyst;

            if (OutputFormat == null)
            {
                OutputFormat = InputFormat;
            }

            _data = new BasicMLDataSet();
            ResetStatus();
            int recordCount = 0;

            int outputLength = _analyst.DetermineTotalColumns();
            var csv          = new ReadCSV(InputFilename.ToString(),
                                           ExpectInputHeaders, InputFormat);

            ReadHeaders(csv);

            _analystHeaders = new CSVHeaders(InputHeadings);

            while (csv.Next() && !ShouldStop())
            {
                UpdateStatus(true);

                var row = new LoadedRow(csv, 1);

                double[] inputArray = AnalystNormalizeCSV.ExtractFields(
                    _analyst, _analystHeaders, csv, outputLength, true);
                var input = new ClusterRow(inputArray, row);
                _data.Add(input);

                recordCount++;
            }
            RecordCount = recordCount;
            Count       = csv.ColumnCount;

            ReadHeaders(csv);
            csv.Close();
            ReportDone(true);
        }
Exemple #6
0
        /// <summary>
        /// Analyze the data. This counts the records and prepares the data to be
        /// processed.
        /// </summary>
        ///
        /// <param name="theAnalyst">The analyst to use.</param>
        /// <param name="inputFile">The input file.</param>
        /// <param name="headers">True if headers are present.</param>
        /// <param name="format">The format.</param>
        public void Analyze(EncogAnalyst theAnalyst,
                            FileInfo inputFile, bool headers, CSVFormat format)
        {
            InputFilename      = inputFile;
            ExpectInputHeaders = headers;
            Format             = format;

            Analyzed = true;
            _analyst = theAnalyst;

            PerformBasicCounts();
            _fileColumns   = InputHeadings.Length;
            _outputColumns = _analyst.DetermineOutputFieldCount();

            _analystHeaders = new CSVHeaders(InputHeadings);
            _series         = new TimeSeriesUtil(_analyst, false,
                                                 _analystHeaders.Headers);
        }
Exemple #7
0
        /// <summary>
        /// Write the headers.
        /// </summary>
        ///
        /// <param name="tw">The output stream.</param>
        private void WriteHeaders(StreamWriter tw)
        {
            var line = new StringBuilder();

            foreach (AnalystField stat  in  _analyst.Script.Normalize.NormalizedFields)
            {
                int needed = stat.ColumnsNeeded;

                for (int i = 0; i < needed; i++)
                {
                    AppendSeparator(line, InputFormat);
                    line.Append('\"');
                    line.Append(CSVHeaders.TagColumn(stat.Name, i,
                                                     stat.TimeSlice, needed > 1));
                    line.Append('\"');
                }
            }
            tw.WriteLine(line.ToString());
        }
Exemple #8
0
        /// <summary>
        /// Add headings for a raw file.
        /// </summary>
        ///
        /// <param name="line">The line to write the raw headings to.</param>
        /// <param name="prefix">The prefix to place.</param>
        /// <param name="format">The format to use.</param>
        public void AddRawHeadings(StringBuilder line,
                                   String prefix, CSVFormat format)
        {
            int subFields = ColumnsNeeded;

            for (int i = 0; i < subFields; i++)
            {
                String str = CSVHeaders.TagColumn(_name, i,
                                                  _timeSlice, subFields > 1);
                BasicFile.AppendSeparator(line, format);
                line.Append('\"');
                if (prefix != null)
                {
                    line.Append(prefix);
                }
                line.Append(str);
                line.Append('\"');
            }
        }
Exemple #9
0
        /// <summary>
        ///     Analyze the file.
        /// </summary>
        /// <param name="inputFilename">The input file.</param>
        /// <param name="expectInputHeaders">True, if input headers are present.</param>
        /// <param name="inputFormat">The format.</param>
        /// <param name="theAnalyst">The analyst to use.</param>
        public void Analyze(FileInfo inputFilename,
                            bool expectInputHeaders, CSVFormat inputFormat,
                            EncogAnalyst theAnalyst)
        {
            InputFilename      = inputFilename;
            Format             = inputFormat;
            ExpectInputHeaders = expectInputHeaders;
            _analyst           = theAnalyst;
            Analyzed           = true;

            _analystHeaders = new CSVHeaders(inputFilename, expectInputHeaders,
                                             inputFormat);

            foreach (AnalystField field in _analyst.Script.Normalize.NormalizedFields)
            {
                field.Init();
            }

            _series = new TimeSeriesUtil(_analyst, true,
                                         _analystHeaders.Headers);
        }
        /// <summary>
        /// Determine the ideal fields.
        /// </summary>
        ///
        /// <param name="headerList">The headers.</param>
        /// <returns>The indexes of the ideal fields.</returns>
        private int[] DetermineIdealFields(CSVHeaders headerList)
        {
            int[]  result;
            String type = Prop.GetPropertyString(
                ScriptProperties.MlConfigType);

            // is it non-supervised?
            if (type.Equals(MLMethodFactory.TypeSOM))
            {
                result = new int[0];
                return(result);
            }

            IList <Int32> fields = new List <Int32>();

            for (int currentIndex = 0; currentIndex < headerList.Size(); currentIndex++)
            {
                String       baseName = headerList.GetBaseHeader(currentIndex);
                int          slice    = headerList.GetSlice(currentIndex);
                AnalystField field    = Analyst.Script
                                        .FindNormalizedField(baseName, slice);

                if (field != null && field.Output)
                {
                    fields.Add(currentIndex);
                }
            }

            // allocate result array
            result = new int[fields.Count];
            for (int i = 0; i < result.Length; i++)
            {
                result[i] = (fields[i]);
            }

            return(result);
        }
Exemple #11
0
        /// <summary>
        /// Extract fields from a file into a numeric array for machine learning.
        /// </summary>
        ///
        /// <param name="analyst">The analyst to use.</param>
        /// <param name="headers">The headers for the input data.</param>
        /// <param name="csv">The CSV that holds the input data.</param>
        /// <param name="outputLength">The length of the returned array.</param>
        /// <param name="skipOutput">True if the output should be skipped.</param>
        /// <returns>The encoded data.</returns>
        public static double[] ExtractFields(EncogAnalyst analyst,
                                             CSVHeaders headers, ReadCSV csv,
                                             int outputLength, bool skipOutput)
        {
            var output      = new double[outputLength];
            int outputIndex = 0;

            foreach (AnalystField stat in analyst.Script.Normalize.NormalizedFields)
            {
                if (stat.Action == NormalizationAction.Ignore)
                {
                    continue;
                }

                if (stat.Output && skipOutput)
                {
                    continue;
                }

                int    index = headers.Find(stat.Name);
                String str   = csv.Get(index);

                // is this an unknown value?
                if (str.Equals("?") || str.Length == 0)
                {
                    IHandleMissingValues handler = analyst.Script.Normalize.MissingValues;
                    double[]             d       = handler.HandleMissing(analyst, stat);

                    // should we skip the entire row
                    if (d == null)
                    {
                        return(null);
                    }

                    // copy the returned values in place of the missing values
                    for (int i = 0; i < d.Length; i++)
                    {
                        output[outputIndex++] = d[i];
                    }
                }
                else
                {
                    // known value

                    if (stat.Action == NormalizationAction.Normalize)
                    {
                        double d = csv.Format.Parse(str.Trim());
                        d = stat.Normalize(d);
                        output[outputIndex++] = d;
                    }
                    else
                    {
                        double[] d = stat.Encode(str.Trim());

                        foreach (double element in d)
                        {
                            output[outputIndex++] = element;
                        }
                    }
                }
            }

            return(output);
        }