Analyze() public method

Analyze the data. This counts the records and prepares the data to be processed.
public Analyze ( FileInfo inputFile, bool headers, CSVFormat format ) : void
inputFile System.IO.FileInfo The input file to process.
headers bool True, if headers are present.
format Encog.Util.CSV.CSVFormat The format of the CSV file.
return void
Example #1
0
        public void TestBalanceCSVHeaders()
        {
            GenerateTestFile(true);
            var norm = new BalanceCSV();
            norm.Analyze(InputName, true, CSVFormat.English);
            norm.Process(OutputName, 1, 2);

            var tr = new StreamReader(OutputName.ToString());

            Assert.AreEqual("\"a\",\"b\"", tr.ReadLine());
            Assert.AreEqual("one,1", tr.ReadLine());
            Assert.AreEqual("two,1", tr.ReadLine());
            Assert.AreEqual("four,2", tr.ReadLine());
            Assert.AreEqual("five,2", tr.ReadLine());
            Assert.AreEqual("six,3", tr.ReadLine());
            Assert.AreEqual(2, norm.Counts["1"]);
            Assert.AreEqual(2, norm.Counts["2"]);
            Assert.AreEqual(1, norm.Counts["3"]);
            tr.Close();

            InputName.Delete();
            OutputName.Delete();
        }
        /// <inheritdoc />
        public override sealed bool ExecuteCommand(String args)
        {
            // get filenames
            String sourceID = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigSourceFile);
            String targetID = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigTargetFile);

            EncogLogging.Log(EncogLogging.LevelDebug, "Beginning balance");
            EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID);
            EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID);

            FileInfo sourceFile = Script.ResolveFilename(sourceID);
            FileInfo targetFile = Script.ResolveFilename(targetID);

            // get other config data
            int countPer = Prop.GetPropertyInt(
                ScriptProperties.BalanceConfigCountPer);
            String targetFieldStr = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigBalanceField);
            DataField targetFieldDf = Analyst.Script.FindDataField(
                targetFieldStr);
            if (targetFieldDf == null)
            {
                throw new AnalystError("Can't find balance target field: "
                                       + targetFieldStr);
            }
            if (!targetFieldDf.Class)
            {
                throw new AnalystError("Can't balance on non-class field: "
                                       + targetFieldStr);
            }

            int targetFieldIndex = Analyst.Script
                                          .FindDataFieldIndex(targetFieldDf);

            // mark generated
            Script.MarkGenerated(targetID);

            // get formats
            CSVFormat inputFormat = Script.DetermineFormat();
            CSVFormat outputFormat = Script.DetermineFormat();

            // prepare to normalize
            var balance = new BalanceCSV {Script = Script};
            Analyst.CurrentQuantTask = balance;
            balance.Report = new AnalystReportBridge(Analyst);

            bool headers = Script.ExpectInputHeaders(sourceID);
            balance.Analyze(sourceFile, headers, inputFormat);
            balance.ProduceOutputHeaders = true;
            balance.Process(targetFile, targetFieldIndex, countPer);
            Analyst.CurrentQuantTask = null;
            return balance.ShouldStop();
        }