/// <summary> /// Analyze the file. /// </summary> private void AnalyzeFile() { ScriptProperties prop = _analyst.Script.Properties; // get filenames, headers & format String sourceID = prop.GetPropertyString( ScriptProperties.HeaderDatasourceRawFile); FileInfo sourceFile = _analyst.Script.ResolveFilename(sourceID); CSVFormat format = _analyst.Script.DetermineFormat(); bool headers = _analyst.Script.ExpectInputHeaders(sourceID); // read the file _rowCount = 0; _missingCount = 0; var csv = new ReadCSV(sourceFile.ToString(), headers, format); while (csv.Next()) { _rowCount++; if (csv.HasMissing()) { _missingCount++; } } csv.Close(); }
/// <summary> /// Set the source file. This is useful if you want to use pre-existing stats /// to normalize something and skip the analyze step. /// </summary> /// /// <param name="file">The file to use.</param> /// <param name="headers">True, if headers are to be expected.</param> /// <param name="format">The format of the CSV file.</param> public void SetSourceFile(FileInfo file, bool headers, CSVFormat format) { InputFilename = file; ExpectInputHeaders = headers; InputFormat = format; }
/// <summary> /// Construct an analyzed field. /// </summary> /// <param name="theScript">The script being analyzed.</param> /// <param name="name">The name of the field.</param> public AnalyzedField(AnalystScript theScript, String name) : base(name) { _classMap = new Dictionary <String, AnalystClassItem>(); _instances = 0; _script = theScript; _fmt = _script.DetermineFormat(); }
/// <summary> /// Save the specified matrix. /// </summary> /// <param name="matrix">The matrix to save.</param> /// <param name="xmlOut">The XML writer.</param> public static void SaveMatrix(Matrix matrix, WriteXML xmlOut) { xmlOut.AddAttribute(PersistorUtil.ATTRIBUTE_MATRIX_ROWS, "" + matrix.Rows); xmlOut.AddAttribute(PersistorUtil.ATTRIBUTE_MATRIX_COLS, "" + matrix.Cols); xmlOut.BeginTag("Matrix"); CSVFormat format = CSVFormat.EG_FORMAT; for (int row = 0; row < matrix.Rows; row++) { StringBuilder builder = new StringBuilder(); for (int col = 0; col < matrix.Cols; col++) { if (col > 0) { builder.Append(','); } double d = matrix[row, col]; builder.Append(format.Format(d, 20)); } xmlOut.BeginTag(PersistorUtil.ROW); xmlOut.AddText(builder.ToString()); xmlOut.EndTag(); } xmlOut.EndTag(); }
/// <summary> /// Load a CSV file into a memory dataset. /// </summary> /// /// <param name="format">The CSV format to use.</param> /// <param name="filename">The filename to load.</param> /// <param name="headers">True if there is a header line.</param> /// <param name="inputSize">The input size. Input always comes first in a file.</param> /// <param name="idealSize">The ideal size, 0 for unsupervised.</param> /// <returns>A NeuralDataSet that holds the contents of the CSV file.</returns> public static IMLDataSet LoadCSVTOMemory(CSVFormat format, String filename, bool headers, int inputSize, int idealSize) { var result = new BasicMLDataSet(); var csv = new ReadCSV(filename, headers, format); while (csv.Next()) { BasicMLData ideal = null; int index = 0; var input = new BasicMLData(inputSize); for (int i = 0; i < inputSize; i++) { double d = csv.GetDouble(index++); input[i] = d; } if (idealSize > 0) { ideal = new BasicMLData(idealSize); for (int i = 0; i < idealSize; i++) { double d = csv.GetDouble(index++); ideal[i] = d; } } IMLDataPair pair = new BasicMLDataPair(input, ideal); result.Add(pair); } return(result); }
/// <summary> /// /// </summary> /// public override sealed bool ExecuteCommand(String args) { // get filenames String sourceID = Prop.GetPropertyString( ScriptProperties.GenerateConfigSourceFile); String targetID = Prop.GetPropertyString( ScriptProperties.GenerateConfigTargetFile); CSVFormat format = Analyst.Script.DetermineInputFormat( sourceID); EncogLogging.Log(EncogLogging.LevelDebug, "Beginning generate"); EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID); EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID); FileInfo sourceFile = Script.ResolveFilename(sourceID); FileInfo targetFile = Script.ResolveFilename(targetID); // mark generated Script.MarkGenerated(targetID); // read file bool headers = Script.ExpectInputHeaders(sourceID); var headerList = new CSVHeaders(sourceFile, headers, format); int[] input = DetermineInputFields(headerList); int[] ideal = DetermineIdealFields(headerList); EncogUtility.ConvertCSV2Binary(sourceFile, format, targetFile, input, ideal, headers); return(false); }
private void ConvertEGB2CSV() { if (_cmd.Args.Count != 2) { Console.WriteLine(@"Must specify a source and target."); return; } String sourceFile = _cmd.Args[0]; String targetFile = _cmd.Args[1]; AnalystFileFormat format1 = ConvertStringConst.String2AnalystFileFormat(_cmd.PromptString("format", "decpnt|comma")); CSVFormat format = ConvertStringConst.ConvertToCSVFormat(format1); new FileInfo(targetFile).Delete(); IDataSetCODEC codec = new CSVDataCODEC(targetFile, format, false); var loader = new BinaryDataLoader(codec) { Status = new ConsoleStatusReportable() }; _sw.Start(); loader.Binary2External(sourceFile); }
private void ConvertCSV2EGB() { if (_cmd.Args.Count != 2) { Console.WriteLine(@"Must specify a source and target."); return; } String sourceFile = _cmd.Args[0]; String targetFile = _cmd.Args[1]; bool headers = _cmd.PromptBoolean("headers", true); int inputCount = _cmd.PromptInteger("inputCount", 0); int outputCount = _cmd.PromptInteger("outputCount", 0); if (inputCount == 0) { Console.WriteLine(@"Must specify an input count."); return; } AnalystFileFormat format1 = ConvertStringConst.String2AnalystFileFormat(_cmd.PromptString("format", "decpnt|comma")); CSVFormat format = ConvertStringConst.ConvertToCSVFormat(format1); new FileInfo(targetFile).Delete(); IDataSetCODEC codec = new CSVDataCODEC(sourceFile, format, headers, inputCount, outputCount, false); var loader = new BinaryDataLoader(codec) { Status = new ConsoleStatusReportable() }; _sw.Start(); loader.External2Binary(targetFile); }
public void SetFormatTest() { CSVFormat format = createFormat(); CSVWriter csvw = new CSVWriter(); // Make sure there are no formats to start with int actual = csvw.Formats.Count(); Assert.IsTrue(actual == 0, $"Error: {actual} was returned when 0 were expected"); // Add some formats for (int expected = 1; expected <= 10; expected++) { csvw.SetFormat(expected - 1, format); actual = csvw.Formats.Count(); Assert.IsTrue(actual == expected, $"Error: {actual} was returned when {expected} were expected"); } // Now set a format int rnd = RandomInt(0, 9); csvw.SetFormat(rnd, null); Assert.IsTrue(csvw.Formats[rnd] == null, $"Error: Format [{rnd}] returned a non-null value when NULL was expected"); }
/// <summary> /// Convert a CSV file to binary. /// </summary> /// <param name="csvFile">The CSV file to convert.</param> /// <param name="format">The format.</param> /// <param name="binFile">The binary file.</param> /// <param name="input">The input.</param> /// <param name="ideal">The ideal.</param> /// <param name="headers">True, if headers are present.</param> public static void ConvertCSV2Binary(FileInfo csvFile, CSVFormat format, FileInfo binFile, int[] input, int[] ideal, bool headers) { binFile.Delete(); var csv = new ReadCSV(csvFile.ToString(), headers, format); var buffer = new BufferedMLDataSet(binFile.ToString()); buffer.BeginLoad(input.Length, ideal.Length); while (csv.Next()) { var inputData = new BasicMLData(input.Length); var idealData = new BasicMLData(ideal.Length); // handle input data for (int i = 0; i < input.Length; i++) { inputData[i] = csv.GetDouble(input[i]); } // handle input data for (int i = 0; i < ideal.Length; i++) { idealData[i] = csv.GetDouble(ideal[i]); } // add to dataset buffer.Add(inputData, idealData); } buffer.EndLoad(); buffer.Close(); csv.Close(); }
/// <summary> /// Analyze the data. This counts the records and prepares the data to be /// processed. /// </summary> /// <param name="theAnalyst">The analyst to use.</param> /// <param name="inputFile">The input file.</param> /// <param name="headers">True if headers are present.</param> /// <param name="format">The format the file is in.</param> public void Analyze(EncogAnalyst theAnalyst, FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; Format = format; _analyst = theAnalyst; Analyzed = true; PerformBasicCounts(); _inputCount = _analyst.DetermineInputCount(); _outputCount = _analyst.DetermineOutputCount(); _idealCount = InputHeadings.Length - _inputCount; if ((InputHeadings.Length != _inputCount) && (InputHeadings.Length != (_inputCount + _outputCount))) { throw new AnalystError("Invalid number of columns(" + InputHeadings.Length + "), must match input(" + _inputCount + ") count or input+output(" + (_inputCount + _outputCount) + ") count."); } }
/// <summary> /// Save the dataset to a CSV file. /// </summary> /// <param name="targetFile">The target file.</param> /// <param name="format">The format to use.</param> /// <param name="set">The data set.</param> public static void SaveCSV(FileInfo targetFile, CSVFormat format, IMLDataSet set) { try { var file = new StreamWriter(targetFile.ToString()); foreach (IMLDataPair data in set) { var line = new StringBuilder(); for (int i = 0; i < data.Input.Count; i++) { double d = data.Input[i]; BasicFile.AppendSeparator(line, format); line.Append(format.Format(d, EncogFramework.DefaultPrecision)); } for (int i = 0; i < data.Ideal.Count; i++) { double d = data.Ideal[i]; BasicFile.AppendSeparator(line, format); line.Append(format.Format(d, EncogFramework.DefaultPrecision)); } file.WriteLine(line); } file.Close(); } catch (IOException ex) { throw new EncogError(ex); } }
/// <inheritdoc /> public override sealed bool ExecuteCommand(String args) { // get filenames String sourceID = Prop.GetPropertyString( ScriptProperties.BalanceConfigSourceFile); String targetID = Prop.GetPropertyString( ScriptProperties.BalanceConfigTargetFile); EncogLogging.Log(EncogLogging.LevelDebug, "Beginning balance"); EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID); EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID); FileInfo sourceFile = Script.ResolveFilename(sourceID); FileInfo targetFile = Script.ResolveFilename(targetID); // get other config data int countPer = Prop.GetPropertyInt( ScriptProperties.BalanceConfigCountPer); String targetFieldStr = Prop.GetPropertyString( ScriptProperties.BalanceConfigBalanceField); DataField targetFieldDf = Analyst.Script.FindDataField( targetFieldStr); if (targetFieldDf == null) { throw new AnalystError("Can't find balance target field: " + targetFieldStr); } if (!targetFieldDf.Class) { throw new AnalystError("Can't balance on non-class field: " + targetFieldStr); } int targetFieldIndex = Analyst.Script .FindDataFieldIndex(targetFieldDf); // mark generated Script.MarkGenerated(targetID); // get formats CSVFormat inputFormat = Script.DetermineFormat(); CSVFormat outputFormat = Script.DetermineFormat(); // prepare to normalize var balance = new BalanceCSV { Script = Script }; Analyst.CurrentQuantTask = balance; balance.Report = new AnalystReportBridge(Analyst); bool headers = Script.ExpectInputHeaders(sourceID); balance.Analyze(sourceFile, headers, inputFormat); balance.ProduceOutputHeaders = true; balance.Process(targetFile, targetFieldIndex, countPer); Analyst.CurrentQuantTask = null; return(balance.ShouldStop()); }
/// <inheritdoc /> public override sealed bool ExecuteCommand(String args) { // get filenames String sourceID = Prop.GetPropertyString( ScriptProperties.RandomizeConfigSourceFile); String targetID = Prop.GetPropertyString( ScriptProperties.RandomizeConfigTargetFile); EncogLogging.Log(EncogLogging.LevelDebug, "Beginning randomize"); EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID); EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID); FileInfo sourceFile = Script.ResolveFilename(sourceID); FileInfo targetFile = Script.ResolveFilename(targetID); // get formats CSVFormat format = Script.DetermineFormat(); // mark generated Script.MarkGenerated(targetID); // prepare to normalize var norm = new ShuffleCSV { Script = Script }; Analyst.CurrentQuantTask = norm; norm.Report = new AnalystReportBridge(Analyst); bool headers = Script.ExpectInputHeaders(sourceID); norm.Analyze(sourceFile, headers, format); norm.Process(targetFile); Analyst.CurrentQuantTask = null; return(norm.ShouldStop()); }
/// <summary> /// Construct a CSV source from a filename. Allows a delimiter character to /// be specified. /// </summary> /// <param name="file">The filename.</param> /// <param name="headers">The headers.</param> /// <param name="format">The format.</param> public CSVDataSource(string file, bool headers, CSVFormat format) { _file = file; _headers = headers; _format = format; }
/// <summary> /// Construct a CSV source from a filename. The format parameter specifies /// the separator character to use, as well as the number format. /// </summary> /// <param name="file">The filename.</param> /// <param name="headers">The headers.</param> /// <param name="delim">The delimiter.</param> public CSVDataSource(string file, bool headers, char delim) { _format = new CSVFormat(CSVFormat.DecimalCharacter, delim); _headers = headers; _file = file; }
/// <summary> /// Determine the input format for the specified file. /// </summary> /// /// <param name="sourceID">The file.</param> /// <returns>The input format.</returns> public CSVFormat DetermineInputFormat(String sourceID) { String rawID = Properties.GetPropertyString( ScriptProperties.HeaderDatasourceRawFile); CSVFormat result = Properties.GetPropertyCSVFormat(sourceID.Equals(rawID) ? ScriptProperties.HeaderDatasourceSourceFormat : ScriptProperties.SetupConfigCSVFormat); return(result); }
/// <summary> /// Append a separator. The separator will only be appended if the line is /// not empty. This is used to build comma(or other) separated lists. /// </summary> /// <param name="line">The line to append to.</param> /// <param name="format">The format to use.</param> public static void AppendSeparator(StringBuilder line, CSVFormat format) { if ((line.Length > 0) && !line.ToString().EndsWith(format.Separator + "")) { line.Append(format.Separator); } }
/// <summary> /// Construct this data set using a comma as a delimiter. /// </summary> /// <param name="filename">The CSV filename to read.</param> /// <param name="inputSize">The number of columns that make up the input set.</param> /// <param name="idealSize">The number of columns that make up the ideal set.</param> /// <param name="headers">True if headers are present on the first line.</param> /// <param name="format">The format to use.</param> public CSVNeuralDataSet(String filename, int inputSize, int idealSize, bool headers, CSVFormat format) { this.filename = filename; this.inputSize = inputSize; this.idealSize = idealSize; this.format = format; this.headers = headers; }
/// <summary> /// Construct a loaded row from an IMLData. /// </summary> /// <param name="format">The format to store the numbers in.</param> /// <param name="data">The data to use.</param> /// <param name="extra">The extra positions to allocate.</param> public LoadedRow(CSVFormat format, IMLData data, int extra) { int count = data.Count; _data = new String[count + extra]; for (int i = 0; i < count; i++) { _data[i] = format.Format(data[i], 5); } }
/// <summary> /// Construct a loaded row from an array. /// </summary> /// <param name="format">The format to store the numbers in.</param> /// <param name="data">The data to use.</param> /// <param name="extra">The extra positions to allocate.</param> public LoadedRow(CSVFormat format, double[] data, int extra) { int count = data.Length; _data = new String[count + extra]; for (int i = 0; i < count; i++) { _data[i] = format.Format(data[i], 5); } }
/// <summary> /// Analyze the file. /// </summary> /// <param name="inputFile">The name of the input file.</param> /// <param name="headers">True, if headers are expected.</param> /// <param name="format">The format.</param> public void Analyze(FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; Format = format; Analyzed = true; PerformBasicCounts(); }
/// <summary> /// Load financial data. /// </summary> /// <param name="ticker">The ticker symbol.</param> /// <param name="output">The output file.</param> /// <param name="outputFormat">The output format.</param> /// <param name="from">Starting date.</param> /// <param name="to">Ending date.</param> public void LoadAllData(String ticker, String output, CSVFormat outputFormat, DateTime from, DateTime to) { try { Uri urlData = BuildURL(ticker, from, to); WebRequest httpData = WebRequest.Create(urlData); var responseData = (HttpWebResponse)httpData.GetResponse(); if (responseData != null) { Stream istreamData = responseData.GetResponseStream(); var csvData = new ReadCSV(istreamData, true, CSVFormat.English); TextWriter tw = new StreamWriter(output); tw.WriteLine("date,time,open price,high price,low price,close price,volume,adjusted price"); while (csvData.Next()) { DateTime date = csvData.GetDate("date"); double adjustedClose = csvData.GetDouble("adj close"); double open = csvData.GetDouble("open"); double close = csvData.GetDouble("close"); double high = csvData.GetDouble("high"); double low = csvData.GetDouble("low"); var volume = (long)csvData.GetDouble("volume"); var line = new StringBuilder(); line.Append(NumericDateUtil.DateTime2Long(date)); line.Append(outputFormat.Separator); line.Append(NumericDateUtil.Time2Int(date)); line.Append(outputFormat.Separator); line.Append(outputFormat.Format(open, Precision)); line.Append(outputFormat.Separator); line.Append(outputFormat.Format(high, Precision)); line.Append(outputFormat.Separator); line.Append(outputFormat.Format(low, Precision)); line.Append(outputFormat.Separator); line.Append(outputFormat.Format(close, Precision)); line.Append(outputFormat.Separator); line.Append(volume); line.Append(outputFormat.Separator); line.Append(outputFormat.Format(adjustedClose, Precision)); tw.WriteLine(line.ToString()); } tw.Close(); } } catch (WebException ex) { throw new QuantError(ex); } }
/// <summary> /// Process, and sort the files. /// </summary> /// <param name="inputFile">The input file.</param> /// <param name="outputFile">The output file.</param> /// <param name="headers">True, if headers are to be used.</param> /// <param name="format">The format of the file.</param> public void Process(FileInfo inputFile, FileInfo outputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; Format = format; ReadInputFile(); SortData(); WriteOutputFile(outputFile); }
/// <summary> /// parses one column of a csv and returns an array of doubles. /// you can only return one double array with this method. /// </summary> /// <param name="file">The file.</param> /// <param name="formatused">The formatused.</param> /// <param name="Name">The name of the column to parse..</param> /// <returns></returns> public static List <double> QuickParseCSV(string file, CSVFormat formatused, string Name) { List <double> returnedArrays = new List <double>(); ReadCSV csv = new ReadCSV(file, true, formatused); while (csv.Next()) { returnedArrays.Add(csv.GetDouble(Name)); } return(returnedArrays); }
public VersatileMLDataSet LoadDataSetFromCsv(string filename) { var csvFormatSpec = new CSVFormat('.', '\t'); var dataSource = new CSVDataSource(filename, headers: true, format: csvFormatSpec); var dataSet = new VersatileMLDataSet(dataSource) { NormHelper = { Format = csvFormatSpec } }; return(dataSet); }
/// <summary> /// Prepare the output file, write headers if needed. /// </summary> /// /// <param name="outputFile">The name of the output file.</param> /// <returns>The output stream for the text file.</returns> public StreamWriter PrepareOutputFile(FileInfo outputFile) { try { outputFile.Delete(); var tw = new StreamWriter(outputFile.OpenWrite()); if (_outputFormat == null) { _outputFormat = _inputFormat; } // write headers, if needed if (_produceOutputHeaders) { var line = new StringBuilder(); if (_inputHeadings != null) { foreach (String str in _inputHeadings) { if (line.Length > 0) { line.Append(_outputFormat.Separator); } line.Append("\""); line.Append(str); line.Append("\""); } } else { for (int i = 0; i < _columnCount; i++) { line.Append("\"field:"); line.Append(i + 1); line.Append("\""); } } tw.WriteLine(line.ToString()); } return(tw); } catch (IOException e) { throw new QuantError(e); } }
/// <summary> /// Convert a CSV file to a binary training file. /// </summary> /// <param name="csvFile">The CSV file.</param> /// <param name="format">The format.</param> /// <param name="binFile">The binary file.</param> /// <param name="inputCount">The number of input values.</param> /// <param name="outputCount">The number of output values.</param> /// <param name="headers">True, if there are headers on the3 CSV.</param> /// <param name="expectSignificance">Should a significance column be expected.</param> public static void ConvertCSV2Binary(String csvFile, CSVFormat format, String binFile, int inputCount, int outputCount, bool headers, bool expectSignificance) { new FileInfo(binFile).Delete(); var csv = new CSVMLDataSet(csvFile, inputCount, outputCount, false, format, expectSignificance); var buffer = new BufferedMLDataSet(binFile); buffer.BeginLoad(inputCount, outputCount); foreach (IMLDataPair pair in csv) { buffer.Add(pair); } buffer.EndLoad(); }
/// <summary> /// Analyze the data. This counts the records and prepares the data to be /// processed. /// </summary> /// /// <param name="theAnalyst">The analyst to use.</param> /// <param name="inputFile">The input file to analyze.</param> /// <param name="headers">True, if the input file has headers.</param> /// <param name="format">The format of the input file.</param> public void Analyze(EncogAnalyst theAnalyst, FileInfo inputFile, bool headers, CSVFormat format) { InputFilename = inputFile; ExpectInputHeaders = headers; InputFormat = format; Analyzed = true; _analyst = theAnalyst; if (OutputFormat == null) { OutputFormat = InputFormat; } _data = new BasicMLDataSet(); ResetStatus(); int recordCount = 0; int outputLength = _analyst.DetermineTotalColumns(); var csv = new ReadCSV(InputFilename.ToString(), ExpectInputHeaders, InputFormat); ReadHeaders(csv); _analystHeaders = new CSVHeaders(InputHeadings); while (csv.Next() && !ShouldStop()) { UpdateStatus(true); var row = new LoadedRow(csv, 1); double[] inputArray = AnalystNormalizeCSV.ExtractFields( _analyst, _analystHeaders, csv, outputLength, true); var input = new ClusterRow(inputArray, row); _data.Add(input); recordCount++; } RecordCount = recordCount; Count = csv.ColumnCount; ReadHeaders(csv); csv.Close(); ReportDone(true); }
/// <summary> /// Construct this data set using a comma as a delimiter. /// </summary> /// <param name="filename">The CSV filename to read.</param> /// <param name="inputSize">The number of columns that make up the input set.</param> /// <param name="idealSize">The number of columns that make up the ideal set.</param> /// <param name="headers">True if headers are present on the first line.</param> /// <param name="format">The format to use.</param> public CSVMLDataSet(String filename, int inputSize, int idealSize, bool headers, CSVFormat format, bool expectSignificance) { _filename = filename; _inputSize = inputSize; _idealSize = idealSize; _format = format; _headers = headers; IDataSetCODEC codec = new CSVDataCODEC(filename, format, headers, inputSize, idealSize, expectSignificance); var load = new MemoryDataLoader(codec) { Result = this }; load.External2Memory(); }