Пример #1
0
        /// <summary>
        /// Extract fields from a file into a numeric array for machine learning.
        /// </summary>
        ///
        /// <param name="analyst">The analyst to use.</param>
        /// <param name="headers">The headers for the input data.</param>
        /// <param name="csv">The CSV that holds the input data.</param>
        /// <param name="outputLength">The length of the returned array.</param>
        /// <param name="skipOutput">True if the output should be skipped.</param>
        /// <returns>The encoded data.</returns>
        public static double[] ExtractFields(EncogAnalyst analyst,
                                             CSVHeaders headers, ReadCSV csv,
                                             int outputLength, bool skipOutput)
        {
            var output      = new double[outputLength];
            int outputIndex = 0;

            foreach (AnalystField stat in analyst.Script.Normalize.NormalizedFields)
            {
                if (stat.Action == NormalizationAction.Ignore)
                {
                    continue;
                }

                if (stat.Output && skipOutput)
                {
                    continue;
                }

                int    index = headers.Find(stat.Name);
                String str   = csv.Get(index);

                // is this an unknown value?
                if (str.Equals("?") || str.Length == 0)
                {
                    IHandleMissingValues handler = analyst.Script.Normalize.MissingValues;
                    double[]             d       = handler.HandleMissing(analyst, stat);

                    // should we skip the entire row
                    if (d == null)
                    {
                        return(null);
                    }

                    // copy the returned values in place of the missing values
                    for (int i = 0; i < d.Length; i++)
                    {
                        output[outputIndex++] = d[i];
                    }
                }
                else
                {
                    // known value

                    if (stat.Action == NormalizationAction.Normalize)
                    {
                        double d = csv.Format.Parse(str.Trim());
                        d = stat.Normalize(d);
                        output[outputIndex++] = d;
                    }
                    else
                    {
                        double[] d = stat.Encode(str.Trim());

                        foreach (double element in d)
                        {
                            output[outputIndex++] = element;
                        }
                    }
                }
            }

            return(output);
        }
Пример #2
0
        /// <summary>
        /// Process the file.
        /// </summary>
        ///
        /// <param name="outputFile">The output file.</param>
        /// <param name="method">THe method to use.</param>
        public void Process(FileInfo outputFile, IMLMethod method)
        {
            var csv = new ReadCSV(InputFilename.ToString(),
                                  ExpectInputHeaders, Format);

            IMLData output;

            foreach (AnalystField field in _analyst.Script.Normalize.NormalizedFields)
            {
                field.Init();
            }

            int outputLength = _analyst.DetermineTotalInputFieldCount();

            StreamWriter tw = PrepareOutputFile(outputFile);

            ResetStatus();
            while (csv.Next())
            {
                UpdateStatus(false);
                var row = new LoadedRow(csv, _outputColumns);

                double[] inputArray = AnalystNormalizeCSV.ExtractFields(_analyst,
                                                                        _analystHeaders, csv, outputLength, true);
                if (_series.TotalDepth > 1)
                {
                    inputArray = _series.Process(inputArray);
                }

                if (inputArray != null)
                {
                    IMLData input = new BasicMLData(inputArray);

                    // evaluation data
                    if ((method is IMLClassification) &&
                        !(method is IMLRegression))
                    {
                        // classification only?
                        var tmp = new BasicMLData(1);
                        tmp[0] = ((IMLClassification)method).Classify(input);
                        output = tmp;
                    }
                    else
                    {
                        // regression
                        output = ((IMLRegression)method).Compute(input);
                    }

                    // skip file data
                    int index       = _fileColumns;
                    int outputIndex = 0;


                    // display output
                    foreach (AnalystField field  in  _analyst.Script.Normalize.NormalizedFields)
                    {
                        if (_analystHeaders.Find(field.Name) != -1)
                        {
                            if (field.Output)
                            {
                                if (field.Classify)
                                {
                                    // classification
                                    ClassItem cls = field.DetermineClass(
                                        outputIndex, output);
                                    outputIndex += field.ColumnsNeeded;
                                    if (cls == null)
                                    {
                                        row.Data[index++] = "?Unknown?";
                                    }
                                    else
                                    {
                                        row.Data[index++] = cls.Name;
                                    }
                                }
                                else
                                {
                                    // regression
                                    double n = output[outputIndex++];
                                    n = field.DeNormalize(n);
                                    row.Data[index++] = Format
                                                        .Format(n, Precision);
                                }
                            }
                        }
                    }
                }

                WriteRow(tw, row);
            }
            ReportDone(false);
            tw.Close();
            csv.Close();
        }