public void WeirdColumnTest() { Stream weirdStream = TestUtils.RetrieveResource(weirdColumnResource); var weirdConfig = GetWeirdConfig(); CCRecordSet recordSet = CCRecordSet.FromStream(weirdStream, weirdConfig); var nbc = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier"); nbc.Train(recordSet.Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Fish"])).ToList()); Assert.AreEqual(nbc.Categorize("Seattle Lounge").Category, "Trout"); }
public void TestRecordSetFromCsv() { Stream s = TestUtils.RetrieveResource(fullChargeList); CCRecordSet recordSet = CCRecordSet.FromStream(s, config); recordSet.SerializeToFile(serializationOutput); Assert.IsTrue(File.Exists(serializationOutput)); CCRecordSet newRecordSet = CCRecordSet.FromFile(serializationOutput, config); CollectionAssert.AreEquivalent(recordSet.ToList(), newRecordSet.ToList()); }
public void DuplicateDetectionTest() { var recordReader = new CsvRecordReader("USBank"); Stream stream = TestUtils.RetrieveResource(fullChargeList); CCRecordSet recordSet = CCRecordSet.FromStream(stream, config); var duplicateRecord = new List <CCRecord>() { recordSet.ToArray()[0] }; Updater.ClassifyAndUpdate(recordSet, duplicateRecord, recordReader, config); }
public void TestGrouping() { Stream s = TestUtils.RetrieveResource(fullChargeList); CCRecordSet recordSet = CCRecordSet.FromStream(s, config); var periodSpec = Selectors.periodSpecifiers[Consts.PERIOD_SPECIFIER_MONTH]; var aggregation = Selectors.aggreations[Consts.AGGREGATION_AVG]; var predictedColumns = TestUtils.GetMockC3PredictedColumns(); var report = Transforms.GetPeriodSummary(recordSet.ToDataTable(), periodSpec, predictedColumns[0], aggregation); var expectedHeaders = new DateTime[] { DateTime.Parse("1/1/2004 12:00:00 AM"), DateTime.Parse("2/1/2004 12:00:00 AM"), DateTime.Parse("3/1/2004 12:00:00 AM"), DateTime.Parse("4/1/2004 12:00:00 AM") }; var actualHeaders = report.AsEnumerable().Select(row => row["Period start"]).Cast <DateTime>().ToArray(); CollectionAssert.AreEqual(expectedHeaders, actualHeaders); TestUtils.AssertApproximatelyEqual((decimal)report.Rows[0][1], 65.73384615m); TestUtils.AssertApproximatelyEqual((decimal)report.Rows[1][2], 44.94272727m); TestUtils.AssertApproximatelyEqual((decimal)report.Rows[2][3], 32.17625m); TestUtils.AssertApproximatelyEqual((decimal)report.Rows[3][4], 111.4819697m); }
public void NaiveBayesClassificationIntegrationTest() { Stream oldRecordsStream = TestUtils.RetrieveResource(fullChargeList); CCRecordSet records = CCRecordSet.FromStream(oldRecordsStream, config); var nbc = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier"); var trainingData = records .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Category"])) .ToList(); nbc.Train(trainingData); Assert.AreEqual(nbc.Categorize("Trader Joe's").Category, "GROC"); Assert.AreEqual(nbc.Categorize("Shell Oil 27440482209 Seattle Wa").Category, "TRANS"); trainingData = records .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Owner"])) .ToList(); nbc = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier"); nbc.Train(trainingData); Assert.AreEqual(nbc.Categorize("Radio Shack 00133652 Knoxville").Category, "Bob"); }
public void TestRecordSetWithInvalidValues() { Stream s = TestUtils.RetrieveResource(invalidValueResource); CCRecordSet recordSet = CCRecordSet.FromStream(s, config); }