Exemple #1
0
        private static CCRecordSet FromReader(TextReader reader, C3Configuration config)
        {
            var recordSet = new CCRecordSet(config);

            recordSet.records = recordSet.LoadCCRecordsFromTextReader(reader, config).ToList();
            return(recordSet);
        }
Exemple #2
0
        internal static CCRecordSet ClassifyAndUpdate(Stream oldFileStream, Stream newFileStream, IRecordReader reader, C3Configuration config)
        {
            CCRecordSet     oldRecordSet = CCRecordSet.FromStream(oldFileStream, config);
            List <CCRecord> newRecords   = reader.ReadFromStream(newFileStream, config);

            return(ClassifyAndUpdate(oldRecordSet, newRecords, reader, config));
        }
Exemple #3
0
        internal static CCRecordSet ClassifyAndUpdate(CCRecordSet oldRecordSet, List <CCRecord> newRecords,
                                                      IRecordReader reader, C3Configuration config)
        {
            var classifers = new Dictionary <C3PredictedColumn, IClassifier <string> >();

            foreach (C3PredictedColumn predictedColumn in config.columns)
            {
                var trainingData = oldRecordSet
                                   .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues[predictedColumn.columnName]))
                                   .ToList();

                var classifier = ClassifierFactory.GetClassifierByName <string>(predictedColumn.classifierName);
                Utils.Log(LoggingSeverity.DEBUG,
                          $"Training {predictedColumn.classifierName} on column '{predictedColumn.columnName}' with {trainingData.Count} records");
                classifier.Train(trainingData);
                classifers.Add(predictedColumn, classifier);
            }

            AppendRecords(oldRecordSet, newRecords, classifers);
            return(oldRecordSet);
        }
Exemple #4
0
        internal static void AppendRecords(CCRecordSet oldRecords, List <CCRecord> newRecords,
                                           Dictionary <C3PredictedColumn, IClassifier <string> > columnMap)
        {
            Utils.Log(LoggingSeverity.DEBUG, $"Classifying {newRecords.Count} new records");
            var recordHash = oldRecords.ToHashSet(new UncategorizedCCRecordComparer());

            foreach (var newRecord in newRecords)
            {
                if (recordHash.Contains(newRecord))
                {
                    throw new ApplicationException($"Attempt to add record {newRecord} to recordset, but equivalent record already exists.");
                }
                else
                {
                    foreach (var columnAndClassifier in columnMap)
                    {
                        var classification = columnAndClassifier.Value.Categorize(newRecord.Description);
                        newRecord.PredictedValues[columnAndClassifier.Key.columnName] = classification.Category;
                    }
                    oldRecords.AddRecord(newRecord);
                }
            }
        }