public void WeirdColumnTest() { Stream weirdStream = TestUtils.RetrieveResource(weirdColumnResource); var weirdConfig = GetWeirdConfig(); CCRecordSet recordSet = CCRecordSet.FromStream(weirdStream, weirdConfig); var nbc = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier"); nbc.Train(recordSet.Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Fish"])).ToList()); Assert.AreEqual(nbc.Categorize("Seattle Lounge").Category, "Trout"); }
public void NaiveBayesClassifierSimpleTest() { var records = KeyValuePairFromTsv(bookExample); var classifier = ClassifierFactory.GetClassifierByName <Genre>("NaiveBayesClassifier"); classifier.Train(records); Assert.AreEqual(classifier.Categorize("Curtains and Drapes").Category, Genre.INTERIORDECORATING); Assert.AreEqual(classifier.Categorize("The Ventilation of Bridges").Category, Genre.ENGINEERING); Assert.AreEqual(classifier.Categorize("Tax Accounting").Category, Genre.ACCOUNTING); }
public void NaiveBayesClassificationIntegrationTest() { Stream oldRecordsStream = TestUtils.RetrieveResource(fullChargeList); CCRecordSet records = CCRecordSet.FromStream(oldRecordsStream, config); var nbc = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier"); var trainingData = records .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Category"])) .ToList(); nbc.Train(trainingData); Assert.AreEqual(nbc.Categorize("Trader Joe's").Category, "GROC"); Assert.AreEqual(nbc.Categorize("Shell Oil 27440482209 Seattle Wa").Category, "TRANS"); trainingData = records .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Owner"])) .ToList(); nbc = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier"); nbc.Train(trainingData); Assert.AreEqual(nbc.Categorize("Radio Shack 00133652 Knoxville").Category, "Bob"); }
internal static CCRecordSet ClassifyAndUpdate(CCRecordSet oldRecordSet, List <CCRecord> newRecords, IRecordReader reader, C3Configuration config) { var classifers = new Dictionary <C3PredictedColumn, IClassifier <string> >(); foreach (C3PredictedColumn predictedColumn in config.columns) { var trainingData = oldRecordSet .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues[predictedColumn.columnName])) .ToList(); var classifier = ClassifierFactory.GetClassifierByName <string>(predictedColumn.classifierName); Utils.Log(LoggingSeverity.DEBUG, $"Training {predictedColumn.classifierName} on column '{predictedColumn.columnName}' with {trainingData.Count} records"); classifier.Train(trainingData); classifers.Add(predictedColumn, classifier); } AppendRecords(oldRecordSet, newRecords, classifers); return(oldRecordSet); }