/// <summary> /// Analyze the data. This counts the records and prepares the data to be /// processed. /// </summary> /// /// <param name="theAnalyst">The analyst to use.</param> /// <param name="inputFile">The input file to analyze.</param> /// <param name="headers">True, if the input file has headers.</param> /// <param name="format">The format of the input file.</param> public void Analyze(EncogAnalyst theAnalyst, FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; InputFormat = format; Analyzed = true; _analyst = theAnalyst; if (OutputFormat == null) { OutputFormat = InputFormat; } _data = new BasicMLDataSet(); ResetStatus(); int recordCount = 0; int outputLength = _analyst.DetermineTotalColumns(); var csv = new ReadCSV(InputFilename.ToString(), ExpectInputHeaders, InputFormat); ReadHeaders(csv); _analystHeaders = new CSVHeaders(InputHeadings); while (csv.Next() && !ShouldStop()) { UpdateStatus(true); var row = new LoadedRow(csv, 1); double[] inputArray = AnalystNormalizeCSV.ExtractFields( _analyst, _analystHeaders, csv, outputLength, true); var input = new ClusterRow(inputArray, row); _data.Add(input); recordCount++; } RecordCount = recordCount; Count = csv.ColumnCount; ReadHeaders(csv); csv.Close(); ReportDone(true); }
/// <summary> /// Process the file. /// </summary> /// /// <param name="outputFile">The output file.</param> /// <param name="method">THe method to use.</param> public void Process(FileInfo outputFile, IMLMethod method) { var csv = new ReadCSV(InputFilename.ToString(), ExpectInputHeaders, Format); IMLData output; foreach (AnalystField field in _analyst.Script.Normalize.NormalizedFields) { field.Init(); } int outputLength = _analyst.DetermineTotalInputFieldCount(); StreamWriter tw = PrepareOutputFile(outputFile); ResetStatus(); while (csv.Next()) { UpdateStatus(false); var row = new LoadedRow(csv, _outputColumns); double[] inputArray = AnalystNormalizeCSV.ExtractFields(_analyst, _analystHeaders, csv, outputLength, true); if (_series.TotalDepth > 1) { inputArray = _series.Process(inputArray); } if (inputArray != null) { IMLData input = new BasicMLData(inputArray); // evaluation data if ((method is IMLClassification) && !(method is IMLRegression)) { // classification only? var tmp = new BasicMLData(1); tmp[0] = ((IMLClassification)method).Classify(input); output = tmp; } else { // regression output = ((IMLRegression)method).Compute(input); } // skip file data int index = _fileColumns; int outputIndex = 0; // display output foreach (AnalystField field in _analyst.Script.Normalize.NormalizedFields) { if (_analystHeaders.Find(field.Name) != -1) { if (field.Output) { if (field.Classify) { // classification ClassItem cls = field.DetermineClass( outputIndex, output); outputIndex += field.ColumnsNeeded; if (cls == null) { row.Data[index++] = "?Unknown?"; } else { row.Data[index++] = cls.Name; } } else { // regression double n = output[outputIndex++]; n = field.DeNormalize(n); row.Data[index++] = Format .Format(n, Precision); } } } } } WriteRow(tw, row); } ReportDone(false); tw.Close(); csv.Close(); }