示例#1
0
        public void WeirdColumnTest()
        {
            Stream      weirdStream = TestUtils.RetrieveResource(weirdColumnResource);
            var         weirdConfig = GetWeirdConfig();
            CCRecordSet recordSet   = CCRecordSet.FromStream(weirdStream, weirdConfig);
            var         nbc         = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier");

            nbc.Train(recordSet.Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Fish"])).ToList());
            Assert.AreEqual(nbc.Categorize("Seattle Lounge").Category, "Trout");
        }
示例#2
0
        public void NaiveBayesClassifierSimpleTest()
        {
            var records    = KeyValuePairFromTsv(bookExample);
            var classifier = ClassifierFactory.GetClassifierByName <Genre>("NaiveBayesClassifier");

            classifier.Train(records);

            Assert.AreEqual(classifier.Categorize("Curtains and Drapes").Category, Genre.INTERIORDECORATING);
            Assert.AreEqual(classifier.Categorize("The Ventilation of Bridges").Category, Genre.ENGINEERING);
            Assert.AreEqual(classifier.Categorize("Tax Accounting").Category, Genre.ACCOUNTING);
        }
示例#3
0
        public void NaiveBayesClassificationIntegrationTest()
        {
            Stream      oldRecordsStream = TestUtils.RetrieveResource(fullChargeList);
            CCRecordSet records          = CCRecordSet.FromStream(oldRecordsStream, config);
            var         nbc          = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier");
            var         trainingData = records
                                       .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Category"]))
                                       .ToList();

            nbc.Train(trainingData);
            Assert.AreEqual(nbc.Categorize("Trader Joe's").Category, "GROC");
            Assert.AreEqual(nbc.Categorize("Shell Oil 27440482209 Seattle Wa").Category, "TRANS");

            trainingData = records
                           .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues["Owner"]))
                           .ToList();
            nbc = ClassifierFactory.GetClassifierByName <string>("NaiveBayesClassifier");
            nbc.Train(trainingData);
            Assert.AreEqual(nbc.Categorize("Radio Shack 00133652 Knoxville").Category, "Bob");
        }
示例#4
0
        internal static CCRecordSet ClassifyAndUpdate(CCRecordSet oldRecordSet, List <CCRecord> newRecords,
                                                      IRecordReader reader, C3Configuration config)
        {
            var classifers = new Dictionary <C3PredictedColumn, IClassifier <string> >();

            foreach (C3PredictedColumn predictedColumn in config.columns)
            {
                var trainingData = oldRecordSet
                                   .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues[predictedColumn.columnName]))
                                   .ToList();

                var classifier = ClassifierFactory.GetClassifierByName <string>(predictedColumn.classifierName);
                Utils.Log(LoggingSeverity.DEBUG,
                          $"Training {predictedColumn.classifierName} on column '{predictedColumn.columnName}' with {trainingData.Count} records");
                classifier.Train(trainingData);
                classifers.Add(predictedColumn, classifier);
            }

            AppendRecords(oldRecordSet, newRecords, classifers);
            return(oldRecordSet);
        }