Exemplo n.º 1
0
        private static CCRecordSet FromReader(TextReader reader, C3Configuration config)
        {
            var recordSet = new CCRecordSet(config);

            recordSet.records = recordSet.LoadCCRecordsFromTextReader(reader, config).ToList();
            return(recordSet);
        }
Exemplo n.º 2
0
 public static CCRecordSet FromStream(Stream stream, C3Configuration config)
 {
     using (var reader = new StreamReader(stream))
     {
         return(FromReader(reader, config));
     }
 }
Exemplo n.º 3
0
 public static CCRecordSet FromFile(string filename, C3Configuration config)
 {
     if (string.IsNullOrEmpty(filename))
     {
         throw new InvalidOperationException($"{nameof(filename)} must be specified.");
     }
     using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read))
     {
         return(FromStream(fs, config));
     }
 }
Exemplo n.º 4
0
        public static CCRecordSet FromDataTable(DataTable dt, C3Configuration config)
        {
            if (dt == null)
            {
                throw new InvalidOperationException($"{nameof(dt)} may not be null.");
            }
            List <CCRecord> tempRecords = new List <CCRecord>();

            foreach (DataRow dr in dt.Rows)
            {
                tempRecords.Add(new CCRecord(dr, config));
            }
            return(new CCRecordSet(tempRecords, config));
        }
Exemplo n.º 5
0
        internal static CCRecordSet ClassifyAndUpdate(CCRecordSet oldRecordSet, List <CCRecord> newRecords,
                                                      IRecordReader reader, C3Configuration config)
        {
            var classifers = new Dictionary <C3PredictedColumn, IClassifier <string> >();

            foreach (C3PredictedColumn predictedColumn in config.columns)
            {
                var trainingData = oldRecordSet
                                   .Select(rec => new KeyValuePair <string, string>(rec.Description, rec.PredictedValues[predictedColumn.columnName]))
                                   .ToList();

                var classifier = ClassifierFactory.GetClassifierByName <string>(predictedColumn.classifierName);
                Utils.Log(LoggingSeverity.DEBUG,
                          $"Training {predictedColumn.classifierName} on column '{predictedColumn.columnName}' with {trainingData.Count} records");
                classifier.Train(trainingData);
                classifers.Add(predictedColumn, classifier);
            }

            AppendRecords(oldRecordSet, newRecords, classifers);
            return(oldRecordSet);
        }
Exemplo n.º 6
0
        public CCRecord(DataRow dataRow, C3Configuration config)
        {
            if (dataRow == null)
            {
                throw new InvalidOperationException($"{nameof(dataRow)} must be provided.");
            }
            if (config == null)
            {
                throw new InvalidOperationException($"{nameof(config)} must be provided.");
            }
            this.TransactionTime = dataRow.Field <DateTime>(Consts.TRANSACTIONTIME);
            this.Description     = dataRow.Field <string>(Consts.DESCRIPTION);
            this.Amount          = dataRow.Field <decimal>(Consts.AMOUNT);
            var predictedValues = new Dictionary <string, string>();

            foreach (var column in config.columns)
            {
                predictedValues[column.columnName] = dataRow.Field <string>(column.columnName);
            }
            this.PredictedValues = predictedValues;
        }
Exemplo n.º 7
0
        private IEnumerable <CCRecord> LoadCCRecordsFromTextReader(TextReader reader, C3Configuration config)
        {
            Utils.Log(LoggingSeverity.DEBUG, "Deserializing existing records");
            var csv = new CsvReader(reader);

            while (csv.Read())
            {
                var currentRecord = csv.GetRecord <CCRecord>();
                currentRecord.PredictedValues = config.columns
                                                .ToDictionary(k => k.columnName, v => csv[v.columnName]);
                foreach (var column in config.columns)
                {
                    if (!column.validValues.Contains(csv[column.columnName]))
                    {
                        throw new InvalidDataException(
                                  $"Specified input ({csv[column.columnName]}) is not a valid value for column {column.columnName}.");
                    }
                }
                yield return(currentRecord);
            }
        }
Exemplo n.º 8
0
 internal CCRecordSet(List <CCRecord> records, C3Configuration config) : this(config)
 {
     this.records = records ?? throw new InvalidOperationException($"{nameof(records)} may not be null.");
 }
Exemplo n.º 9
0
 internal CCRecordSet(C3Configuration config)
 {
     this.config           = config ?? throw new InvalidOperationException($"{nameof(config)} may not be null.");
     this.predictedHeaders = this.config.columns
                             .ToDictionary(c => c.columnName, t => typeof(string));
 }
Exemplo n.º 10
0
        internal static CCRecordSet ClassifyAndUpdate(Stream oldFileStream, Stream newFileStream, IRecordReader reader, C3Configuration config)
        {
            CCRecordSet     oldRecordSet = CCRecordSet.FromStream(oldFileStream, config);
            List <CCRecord> newRecords   = reader.ReadFromStream(newFileStream, config);

            return(ClassifyAndUpdate(oldRecordSet, newRecords, reader, config));
        }
Exemplo n.º 11
0
        /// <summary>
        /// Deserialize the old records from disk, read new records from file.  Predict the category of the new records,
        /// append them to the old records, write the whole thing back to disk.
        /// </summary>
        public static void ClassifyAndUpdate(string oldFileName, string newFileName, IRecordReader reader, C3Configuration config)
        {
            CCRecordSet updatedRecords;

            using (FileStream oldFs = new FileStream(oldFileName, FileMode.Open, FileAccess.Read))
            {
                using (FileStream newFs = new FileStream(newFileName, FileMode.Open, FileAccess.Read))
                {
                    updatedRecords = ClassifyAndUpdate(oldFs, newFs, reader, config);
                }
            }
            updatedRecords.SerializeToFile(oldFileName);
        }