private static CCRecordSet FromReader(TextReader reader, C3Configuration config) { var recordSet = new CCRecordSet(config); recordSet.records = recordSet.LoadCCRecordsFromTextReader(reader, config).ToList(); return(recordSet); }
internal static CCRecordSet ClassifyAndUpdate(Stream oldFileStream, Stream newFileStream, IRecordReader reader, C3Configuration config) { CCRecordSet oldRecordSet = CCRecordSet.FromStream(oldFileStream, config); List <CCRecord> newRecords = reader.ReadFromStream(newFileStream, config); return(ClassifyAndUpdate(oldRecordSet, newRecords, reader, config)); }
internal static CCRecordSet ClassifyAndUpdate(CCRecordSet oldRecordSet, List <CCRecord> newRecords, IRecordReader reader, C3Configuration config) { var classifers = new Dictionary <C3PredictedColumn, IClassifier <string> >(); foreach (C3PredictedColumn predictedColumn in config.columns) { var trainingData = oldRecordSet .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues[predictedColumn.columnName])) .ToList(); var classifier = ClassifierFactory.GetClassifierByName <string>(predictedColumn.classifierName); Utils.Log(LoggingSeverity.DEBUG, $"Training {predictedColumn.classifierName} on column '{predictedColumn.columnName}' with {trainingData.Count} records"); classifier.Train(trainingData); classifers.Add(predictedColumn, classifier); } AppendRecords(oldRecordSet, newRecords, classifers); return(oldRecordSet); }
internal static void AppendRecords(CCRecordSet oldRecords, List <CCRecord> newRecords, Dictionary <C3PredictedColumn, IClassifier <string> > columnMap) { Utils.Log(LoggingSeverity.DEBUG, $"Classifying {newRecords.Count} new records"); var recordHash = oldRecords.ToHashSet(new UncategorizedCCRecordComparer()); foreach (var newRecord in newRecords) { if (recordHash.Contains(newRecord)) { throw new ApplicationException($"Attempt to add record {newRecord} to recordset, but equivalent record already exists."); } else { foreach (var columnAndClassifier in columnMap) { var classification = columnAndClassifier.Value.Categorize(newRecord.Description); newRecord.PredictedValues[columnAndClassifier.Key.columnName] = classification.Category; } oldRecords.AddRecord(newRecord); } } }