Exemplo n.º 1
0
        /// <summary>
        /// Extract fields from a file into a numeric array for machine learning.
        /// </summary>
        ///
        /// <param name="analyst">The analyst to use.</param>
        /// <param name="headers">The headers for the input data.</param>
        /// <param name="csv">The CSV that holds the input data.</param>
        /// <param name="outputLength">The length of the returned array.</param>
        /// <param name="skipOutput">True if the output should be skipped.</param>
        /// <returns>The encoded data.</returns>
        public static double[] ExtractFields(EncogAnalyst analyst,
                                             CSVHeaders headers, ReadCSV csv,
                                             int outputLength, bool skipOutput)
        {
            var output      = new double[outputLength];
            int outputIndex = 0;

            foreach (AnalystField stat in analyst.Script.Normalize.NormalizedFields)
            {
                if (stat.Action == NormalizationAction.Ignore)
                {
                    continue;
                }

                if (stat.Output && skipOutput)
                {
                    continue;
                }

                int    index = headers.Find(stat.Name);
                String str   = csv.Get(index);

                // is this an unknown value?
                if (str.Equals("?") || str.Length == 0)
                {
                    IHandleMissingValues handler = analyst.Script.Normalize.MissingValues;
                    double[]             d       = handler.HandleMissing(analyst, stat);

                    // should we skip the entire row
                    if (d == null)
                    {
                        return(null);
                    }

                    // copy the returned values in place of the missing values
                    for (int i = 0; i < d.Length; i++)
                    {
                        output[outputIndex++] = d[i];
                    }
                }
                else
                {
                    // known value

                    if (stat.Action == NormalizationAction.Normalize)
                    {
                        double d = csv.Format.Parse(str.Trim());
                        d = stat.Normalize(d);
                        output[outputIndex++] = d;
                    }
                    else
                    {
                        double[] d = stat.Encode(str.Trim());

                        foreach (double element in d)
                        {
                            output[outputIndex++] = element;
                        }
                    }
                }
            }

            return(output);
        }