/// <summary> /// Analyze the data. This counts the records and prepares the data to be /// processed. /// </summary> /// /// <param name="theAnalyst">The analyst to use.</param> /// <param name="inputFile">The input file to analyze.</param> /// <param name="headers">True, if the input file has headers.</param> /// <param name="format">The format of the input file.</param> public void Analyze(EncogAnalyst theAnalyst, FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; InputFormat = format; Analyzed = true; _analyst = theAnalyst; if (OutputFormat == null) { OutputFormat = InputFormat; } _data = new BasicMLDataSet(); ResetStatus(); int recordCount = 0; int outputLength = _analyst.DetermineTotalColumns(); var csv = new ReadCSV(InputFilename.ToString(), ExpectInputHeaders, InputFormat); ReadHeaders(csv); _analystHeaders = new CSVHeaders(InputHeadings); while (csv.Next() && !ShouldStop()) { UpdateStatus(true); var row = new LoadedRow(csv, 1); double[] inputArray = AnalystNormalizeCSV.ExtractFields( _analyst, _analystHeaders, csv, outputLength, true); var input = new ClusterRow(inputArray, row); _data.Add(input); recordCount++; } RecordCount = recordCount; Count = csv.ColumnCount; ReadHeaders(csv); csv.Close(); ReportDone(true); }
/// <summary> /// Construct the time-series utility. /// </summary> /// <param name="theAnalyst">The analyst to use.</param> /// <param name="includeOutput">Should output fields be included.</param> /// <param name="headings">The column headings.</param> public TimeSeriesUtil(EncogAnalyst theAnalyst, bool includeOutput, IEnumerable <string> headings) { _buffer = new List <double[]>(); _headingMap = new Dictionary <String, Int32>(); _analyst = theAnalyst; _lagDepth = _analyst.LagDepth; _leadDepth = _analyst.LeadDepth; _totalDepth = _lagDepth + _leadDepth + 1; _inputSize = includeOutput ? _analyst.DetermineTotalColumns() : _analyst.DetermineTotalInputFieldCount(); _outputSize = _analyst.DetermineInputCount() + _analyst.DetermineOutputCount(); int headingIndex = 0; foreach (String column in headings) { _headingMap[column.ToUpper()] = headingIndex++; } }
/// <summary> /// Analyze the data. This counts the records and prepares the data to be /// processed. /// </summary> /// <param name="theAnalyst">The analyst to use.</param> /// <param name="inputFile">The input file to analyze.</param> /// <param name="headers">True, if the input file has headers.</param> /// <param name="format">The format of the input file.</param> public void Analyze(EncogAnalyst theAnalyst, FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; Format = format; Analyzed = true; _analyst = theAnalyst; _data = new BasicMLDataSet(); ResetStatus(); int recordCount = 0; int outputLength = _analyst.DetermineTotalColumns(); var csv = new ReadCSV(InputFilename.ToString(), ExpectInputHeaders, Format); ReadHeaders(csv); _analystHeaders = new CSVHeaders(InputHeadings); while (csv.Next() && !ShouldStop()) { UpdateStatus(true); double[] inputArray = AnalystNormalizeCSV.ExtractFields( _analyst, _analystHeaders, csv, outputLength, true); IMLData input = new BasicMLData(inputArray); _data.Add(new BasicMLDataPair(input)); recordCount++; } RecordCount = recordCount; Count = csv.ColumnCount; ReadHeaders(csv); csv.Close(); ReportDone(true); }
/// <summary> /// Normalize the input file. Write to the specified file. /// </summary> /// /// <param name="file">The file to write to.</param> public void Normalize(FileInfo file) { if (_analyst == null) { throw new EncogError( "Can't normalize yet, file has not been analyzed."); } ReadCSV csv = null; StreamWriter tw = null; try { csv = new ReadCSV(InputFilename.ToString(), ExpectInputHeaders, InputFormat); file.Delete(); tw = new StreamWriter(file.OpenWrite()); // write headers, if needed if (ProduceOutputHeaders) { WriteHeaders(tw); } ResetStatus(); int outputLength = _analyst.DetermineTotalColumns(); // write file contents while (csv.Next() && !ShouldStop()) { UpdateStatus(false); double[] output = ExtractFields( _analyst, _analystHeaders, csv, outputLength, false); if (_series.TotalDepth > 1) { output = _series.Process(output); } if (output != null) { var line = new StringBuilder(); NumberList.ToList(OutputFormat, line, output); tw.WriteLine(line); } } } catch (IOException e) { throw new QuantError(e); } finally { ReportDone(false); if (csv != null) { try { csv.Close(); } catch (Exception ex) { EncogLogging.Log(ex); } } if (tw != null) { try { tw.Close(); } catch (Exception ex) { EncogLogging.Log(ex); } } } }
/// <summary> /// Construct the time-series utility. /// </summary> /// <param name="theAnalyst">The analyst to use.</param> /// <param name="includeOutput">Should output fields be included.</param> /// <param name="headings">The column headings.</param> public TimeSeriesUtil(EncogAnalyst theAnalyst, bool includeOutput, IEnumerable<string> headings) { _buffer = new List<double[]>(); _headingMap = new Dictionary<String, Int32>(); _analyst = theAnalyst; _lagDepth = _analyst.LagDepth; _leadDepth = _analyst.LeadDepth; _totalDepth = _lagDepth + _leadDepth + 1; _inputSize = includeOutput ? _analyst.DetermineTotalColumns() : _analyst.DetermineTotalInputFieldCount(); _outputSize = _analyst.DetermineInputCount() + _analyst.DetermineOutputCount(); int headingIndex = 0; foreach (String column in headings) { _headingMap[column.ToUpper()] = headingIndex++; } }