Process() public method

Process and balance the data.
public Process ( FileInfo outputFile, int targetField, int countPer ) : void
outputFile System.IO.FileInfo The output file to write data to.
targetField int
countPer int The desired count per class.
return void
Exemplo n.º 1
0
        public void TestBalanceCSVHeaders()
        {
            GenerateTestFile(true);
            var norm = new BalanceCSV();
            norm.Analyze(InputName, true, CSVFormat.English);
            norm.Process(OutputName, 1, 2);

            var tr = new StreamReader(OutputName.ToString());

            Assert.AreEqual("\"a\",\"b\"", tr.ReadLine());
            Assert.AreEqual("one,1", tr.ReadLine());
            Assert.AreEqual("two,1", tr.ReadLine());
            Assert.AreEqual("four,2", tr.ReadLine());
            Assert.AreEqual("five,2", tr.ReadLine());
            Assert.AreEqual("six,3", tr.ReadLine());
            Assert.AreEqual(2, norm.Counts["1"]);
            Assert.AreEqual(2, norm.Counts["2"]);
            Assert.AreEqual(1, norm.Counts["3"]);
            tr.Close();

            InputName.Delete();
            OutputName.Delete();
        }
        /// <inheritdoc />
        public override sealed bool ExecuteCommand(String args)
        {
            // get filenames
            String sourceID = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigSourceFile);
            String targetID = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigTargetFile);

            EncogLogging.Log(EncogLogging.LevelDebug, "Beginning balance");
            EncogLogging.Log(EncogLogging.LevelDebug, "source file:" + sourceID);
            EncogLogging.Log(EncogLogging.LevelDebug, "target file:" + targetID);

            FileInfo sourceFile = Script.ResolveFilename(sourceID);
            FileInfo targetFile = Script.ResolveFilename(targetID);

            // get other config data
            int countPer = Prop.GetPropertyInt(
                ScriptProperties.BalanceConfigCountPer);
            String targetFieldStr = Prop.GetPropertyString(
                ScriptProperties.BalanceConfigBalanceField);
            DataField targetFieldDf = Analyst.Script.FindDataField(
                targetFieldStr);
            if (targetFieldDf == null)
            {
                throw new AnalystError("Can't find balance target field: "
                                       + targetFieldStr);
            }
            if (!targetFieldDf.Class)
            {
                throw new AnalystError("Can't balance on non-class field: "
                                       + targetFieldStr);
            }

            int targetFieldIndex = Analyst.Script
                                          .FindDataFieldIndex(targetFieldDf);

            // mark generated
            Script.MarkGenerated(targetID);

            // get formats
            CSVFormat inputFormat = Script.DetermineFormat();
            CSVFormat outputFormat = Script.DetermineFormat();

            // prepare to normalize
            var balance = new BalanceCSV {Script = Script};
            Analyst.CurrentQuantTask = balance;
            balance.Report = new AnalystReportBridge(Analyst);

            bool headers = Script.ExpectInputHeaders(sourceID);
            balance.Analyze(sourceFile, headers, inputFormat);
            balance.ProduceOutputHeaders = true;
            balance.Process(targetFile, targetFieldIndex, countPer);
            Analyst.CurrentQuantTask = null;
            return balance.ShouldStop();
        }