private static CCRecordSet FromReader(TextReader reader, C3Configuration config) { var recordSet = new CCRecordSet(config); recordSet.records = recordSet.LoadCCRecordsFromTextReader(reader, config).ToList(); return(recordSet); }
public static CCRecordSet FromStream(Stream stream, C3Configuration config) { using (var reader = new StreamReader(stream)) { return(FromReader(reader, config)); } }
public static CCRecordSet FromFile(string filename, C3Configuration config) { if (string.IsNullOrEmpty(filename)) { throw new InvalidOperationException($"{nameof(filename)} must be specified."); } using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read)) { return(FromStream(fs, config)); } }
public static CCRecordSet FromDataTable(DataTable dt, C3Configuration config) { if (dt == null) { throw new InvalidOperationException($"{nameof(dt)} may not be null."); } List <CCRecord> tempRecords = new List <CCRecord>(); foreach (DataRow dr in dt.Rows) { tempRecords.Add(new CCRecord(dr, config)); } return(new CCRecordSet(tempRecords, config)); }
internal static CCRecordSet ClassifyAndUpdate(CCRecordSet oldRecordSet, List <CCRecord> newRecords, IRecordReader reader, C3Configuration config) { var classifers = new Dictionary <C3PredictedColumn, IClassifier <string> >(); foreach (C3PredictedColumn predictedColumn in config.columns) { var trainingData = oldRecordSet .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues[predictedColumn.columnName])) .ToList(); var classifier = ClassifierFactory.GetClassifierByName <string>(predictedColumn.classifierName); Utils.Log(LoggingSeverity.DEBUG, $"Training {predictedColumn.classifierName} on column '{predictedColumn.columnName}' with {trainingData.Count} records"); classifier.Train(trainingData); classifers.Add(predictedColumn, classifier); } AppendRecords(oldRecordSet, newRecords, classifers); return(oldRecordSet); }
public CCRecord(DataRow dataRow, C3Configuration config) { if (dataRow == null) { throw new InvalidOperationException($"{nameof(dataRow)} must be provided."); } if (config == null) { throw new InvalidOperationException($"{nameof(config)} must be provided."); } this.TransactionTime = dataRow.Field <DateTime>(Consts.TRANSACTIONTIME); this.Description = dataRow.Field <string>(Consts.DESCRIPTION); this.Amount = dataRow.Field <decimal>(Consts.AMOUNT); var predictedValues = new Dictionary <string, string>(); foreach (var column in config.columns) { predictedValues[column.columnName] = dataRow.Field <string>(column.columnName); } this.PredictedValues = predictedValues; }
private IEnumerable <CCRecord> LoadCCRecordsFromTextReader(TextReader reader, C3Configuration config) { Utils.Log(LoggingSeverity.DEBUG, "Deserializing existing records"); var csv = new CsvReader(reader); while (csv.Read()) { var currentRecord = csv.GetRecord <CCRecord>(); currentRecord.PredictedValues = config.columns .ToDictionary(k => k.columnName, v => csv[v.columnName]); foreach (var column in config.columns) { if (!column.validValues.Contains(csv[column.columnName])) { throw new InvalidDataException( $"Specified input ({csv[column.columnName]}) is not a valid value for column {column.columnName}."); } } yield return(currentRecord); } }
internal CCRecordSet(List <CCRecord> records, C3Configuration config) : this(config) { this.records = records ?? throw new InvalidOperationException($"{nameof(records)} may not be null."); }
internal CCRecordSet(C3Configuration config) { this.config = config ?? throw new InvalidOperationException($"{nameof(config)} may not be null."); this.predictedHeaders = this.config.columns .ToDictionary(c => c.columnName, t => typeof(string)); }
internal static CCRecordSet ClassifyAndUpdate(Stream oldFileStream, Stream newFileStream, IRecordReader reader, C3Configuration config) { CCRecordSet oldRecordSet = CCRecordSet.FromStream(oldFileStream, config); List <CCRecord> newRecords = reader.ReadFromStream(newFileStream, config); return(ClassifyAndUpdate(oldRecordSet, newRecords, reader, config)); }
/// <summary> /// Deserialize the old records from disk, read new records from file. Predict the category of the new records, /// append them to the old records, write the whole thing back to disk. /// </summary> public static void ClassifyAndUpdate(string oldFileName, string newFileName, IRecordReader reader, C3Configuration config) { CCRecordSet updatedRecords; using (FileStream oldFs = new FileStream(oldFileName, FileMode.Open, FileAccess.Read)) { using (FileStream newFs = new FileStream(newFileName, FileMode.Open, FileAccess.Read)) { updatedRecords = ClassifyAndUpdate(oldFs, newFs, reader, config); } } updatedRecords.SerializeToFile(oldFileName); }