/// <summary> /// /// </summary> /// public override sealed bool ExecuteCommand(String args) { // get filenames String sourceID = Prop.GetPropertyString( ScriptProperties.GenerateConfigSourceFile); String targetID = Prop.GetPropertyString( ScriptProperties.GenerateConfigTargetFile); CSVFormat format = Analyst.Script.DetermineInputFormat( sourceID); EncogLogging.Log(EncogLogging.LevelDebug, "Beginning generate"); EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID); EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID); FileInfo sourceFile = Script.ResolveFilename(sourceID); FileInfo targetFile = Script.ResolveFilename(targetID); // mark generated Script.MarkGenerated(targetID); // read file bool headers = Script.ExpectInputHeaders(sourceID); var headerList = new CSVHeaders(sourceFile, headers, format); int[] input = DetermineInputFields(headerList); int[] ideal = DetermineIdealFields(headerList); EncogUtility.ConvertCSV2Binary(sourceFile, format, targetFile, input, ideal, headers); return(false); }
/// <summary> /// Determine the input fields. /// </summary> /// /// <param name="headerList">The headers.</param> /// <returns>The indexes of the input fields.</returns> private int[] DetermineInputFields(CSVHeaders headerList) { IList <Int32> fields = new List <Int32>(); for (int currentIndex = 0; currentIndex < headerList.Size(); currentIndex++) { String baseName = headerList.GetBaseHeader(currentIndex); int slice = headerList.GetSlice(currentIndex); AnalystField field = Analyst.Script .FindNormalizedField(baseName, slice); if (field != null && field.Input) { fields.Add(currentIndex); } } // allocate result array var result = new int[fields.Count]; for (int i = 0; i < result.Length; i++) { result[i] = (fields[i]); } return(result); }
/// <summary> /// Generate the fields using header values. /// </summary> /// <param name="csv">The CSV file to use.</param> private void GenerateFieldsFromHeaders(ReadCSV csv) { var h = new CSVHeaders(csv.ColumnNames); _fields = new AnalyzedField[csv.ColumnCount]; for (int i = 0; i < _fields.Length; i++) { if (i >= csv.ColumnCount) { throw new AnalystError( "CSV header count does not match column count"); } _fields[i] = new AnalyzedField(_script, h.GetHeader(i)); } }
/// <summary> /// Prepare the output file, write headers if needed. /// </summary> /// /// <param name="outputFile">The output file.</param> ///<returns>The file to write to.</returns> private new StreamWriter PrepareOutputFile(FileInfo outputFile) { try { outputFile.Delete(); var tw = new StreamWriter(outputFile.OpenWrite()); // write headers, if needed if (ProduceOutputHeaders) { var line = new StringBuilder(); // handle provided fields, not all may be used, but all should // be displayed foreach (String heading in InputHeadings) { AppendSeparator(line, Format); line.Append("\""); line.Append(heading); line.Append("\""); } // now add the output fields that will be generated foreach (AnalystField field in _analyst.Script.Normalize.NormalizedFields) { if (field.Output && !field.Ignored) { AppendSeparator(line, Format); line.Append("\"Output:"); line.Append(CSVHeaders.TagColumn(field.Name, 0, field.TimeSlice, false)); line.Append("\""); } } tw.WriteLine(line.ToString()); } return(tw); } catch (IOException e) { throw new QuantError(e); } }
/// <summary> /// Analyze the data. This counts the records and prepares the data to be /// processed. /// </summary> /// /// <param name="theAnalyst">The analyst to use.</param> /// <param name="inputFile">The input file to analyze.</param> /// <param name="headers">True, if the input file has headers.</param> /// <param name="format">The format of the input file.</param> public void Analyze(EncogAnalyst theAnalyst, FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; InputFormat = format; Analyzed = true; _analyst = theAnalyst; if (OutputFormat == null) { OutputFormat = InputFormat; } _data = new BasicMLDataSet(); ResetStatus(); int recordCount = 0; int outputLength = _analyst.DetermineTotalColumns(); var csv = new ReadCSV(InputFilename.ToString(), ExpectInputHeaders, InputFormat); ReadHeaders(csv); _analystHeaders = new CSVHeaders(InputHeadings); while (csv.Next() && !ShouldStop()) { UpdateStatus(true); var row = new LoadedRow(csv, 1); double[] inputArray = AnalystNormalizeCSV.ExtractFields( _analyst, _analystHeaders, csv, outputLength, true); var input = new ClusterRow(inputArray, row); _data.Add(input); recordCount++; } RecordCount = recordCount; Count = csv.ColumnCount; ReadHeaders(csv); csv.Close(); ReportDone(true); }
/// <summary> /// Analyze the data. This counts the records and prepares the data to be /// processed. /// </summary> /// /// <param name="theAnalyst">The analyst to use.</param> /// <param name="inputFile">The input file.</param> /// <param name="headers">True if headers are present.</param> /// <param name="format">The format.</param> public void Analyze(EncogAnalyst theAnalyst, FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; Format = format; Analyzed = true; _analyst = theAnalyst; PerformBasicCounts(); _fileColumns = InputHeadings.Length; _outputColumns = _analyst.DetermineOutputFieldCount(); _analystHeaders = new CSVHeaders(InputHeadings); _series = new TimeSeriesUtil(_analyst, false, _analystHeaders.Headers); }
/// <summary> /// Write the headers. /// </summary> /// /// <param name="tw">The output stream.</param> private void WriteHeaders(StreamWriter tw) { var line = new StringBuilder(); foreach (AnalystField stat in _analyst.Script.Normalize.NormalizedFields) { int needed = stat.ColumnsNeeded; for (int i = 0; i < needed; i++) { AppendSeparator(line, InputFormat); line.Append('\"'); line.Append(CSVHeaders.TagColumn(stat.Name, i, stat.TimeSlice, needed > 1)); line.Append('\"'); } } tw.WriteLine(line.ToString()); }
/// <summary> /// Add headings for a raw file. /// </summary> /// /// <param name="line">The line to write the raw headings to.</param> /// <param name="prefix">The prefix to place.</param> /// <param name="format">The format to use.</param> public void AddRawHeadings(StringBuilder line, String prefix, CSVFormat format) { int subFields = ColumnsNeeded; for (int i = 0; i < subFields; i++) { String str = CSVHeaders.TagColumn(_name, i, _timeSlice, subFields > 1); BasicFile.AppendSeparator(line, format); line.Append('\"'); if (prefix != null) { line.Append(prefix); } line.Append(str); line.Append('\"'); } }
/// <summary> /// Analyze the file. /// </summary> /// <param name="inputFilename">The input file.</param> /// <param name="expectInputHeaders">True, if input headers are present.</param> /// <param name="inputFormat">The format.</param> /// <param name="theAnalyst">The analyst to use.</param> public void Analyze(FileInfo inputFilename, bool expectInputHeaders, CSVFormat inputFormat, EncogAnalyst theAnalyst) { InputFilename = inputFilename; Format = inputFormat; ExpectInputHeaders = expectInputHeaders; _analyst = theAnalyst; Analyzed = true; _analystHeaders = new CSVHeaders(inputFilename, expectInputHeaders, inputFormat); foreach (AnalystField field in _analyst.Script.Normalize.NormalizedFields) { field.Init(); } _series = new TimeSeriesUtil(_analyst, true, _analystHeaders.Headers); }
/// <summary> /// Determine the ideal fields. /// </summary> /// /// <param name="headerList">The headers.</param> /// <returns>The indexes of the ideal fields.</returns> private int[] DetermineIdealFields(CSVHeaders headerList) { int[] result; String type = Prop.GetPropertyString( ScriptProperties.MlConfigType); // is it non-supervised? if (type.Equals(MLMethodFactory.TypeSOM)) { result = new int[0]; return(result); } IList <Int32> fields = new List <Int32>(); for (int currentIndex = 0; currentIndex < headerList.Size(); currentIndex++) { String baseName = headerList.GetBaseHeader(currentIndex); int slice = headerList.GetSlice(currentIndex); AnalystField field = Analyst.Script .FindNormalizedField(baseName, slice); if (field != null && field.Output) { fields.Add(currentIndex); } } // allocate result array result = new int[fields.Count]; for (int i = 0; i < result.Length; i++) { result[i] = (fields[i]); } return(result); }
/// <summary> /// Extract fields from a file into a numeric array for machine learning. /// </summary> /// /// <param name="analyst">The analyst to use.</param> /// <param name="headers">The headers for the input data.</param> /// <param name="csv">The CSV that holds the input data.</param> /// <param name="outputLength">The length of the returned array.</param> /// <param name="skipOutput">True if the output should be skipped.</param> /// <returns>The encoded data.</returns> public static double[] ExtractFields(EncogAnalyst analyst, CSVHeaders headers, ReadCSV csv, int outputLength, bool skipOutput) { var output = new double[outputLength]; int outputIndex = 0; foreach (AnalystField stat in analyst.Script.Normalize.NormalizedFields) { if (stat.Action == NormalizationAction.Ignore) { continue; } if (stat.Output && skipOutput) { continue; } int index = headers.Find(stat.Name); String str = csv.Get(index); // is this an unknown value? if (str.Equals("?") || str.Length == 0) { IHandleMissingValues handler = analyst.Script.Normalize.MissingValues; double[] d = handler.HandleMissing(analyst, stat); // should we skip the entire row if (d == null) { return(null); } // copy the returned values in place of the missing values for (int i = 0; i < d.Length; i++) { output[outputIndex++] = d[i]; } } else { // known value if (stat.Action == NormalizationAction.Normalize) { double d = csv.Format.Parse(str.Trim()); d = stat.Normalize(d); output[outputIndex++] = d; } else { double[] d = stat.Encode(str.Trim()); foreach (double element in d) { output[outputIndex++] = element; } } } } return(output); }