public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress) { string strCsvFile = Properties.Settings.Default.CsvFile; string strDsName = config.Name; string strTrainingSrc = config.Name + ".training"; string strTestingSrc = config.Name + ".testing"; m_bCancel = false; m_iprogress = progress; m_factory.DeleteSources(strTrainingSrc, strTestingSrc); Log log = new Log("CSV Dataset Creator"); log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine); try { //----------------------------------------- // Load the schema that defines the layout // of the CSV file. //----------------------------------------- m_schema = loadSchema(config.Settings); //----------------------------------------- // Load and parse the CSV file. //----------------------------------------- DataConfigSetting dsCsvFile = config.Settings.Find("CSV File"); strCsvFile = dsCsvFile.Value.ToString(); if (strCsvFile.Length == 0) { throw new Exception("CSV data file name not specified!"); } log.WriteLine("Loading the data file..."); if (m_bCancel) { return; } m_parser.Load(strCsvFile, m_schema); //----------------------------------------- // Split the data into training and testing // sets. //----------------------------------------- List <DataItem> rgTraining = new List <DataItem>(); List <DataItem> rgTesting = new List <DataItem>(); DataConfigSetting dsPctTesting = config.Settings.Find("Testing Percentage"); double dfVal = (double)dsPctTesting.Value; Random random = new Random(); for (int i = 0; i < m_parser.Data.Count; i++) { if (random.NextDouble() > dfVal) { rgTraining.Add(m_parser.Data[i]); } else { rgTesting.Add(m_parser.Data[i]); } } Properties.Settings.Default.TestingPct = dfVal; //----------------------------------------- // Create the training data source. //----------------------------------------- int nCellHorizCount = 0; List <int> rgDim = getImageDim(m_parser, m_schema, out nCellHorizCount); int nTrainSrcId = m_factory.AddSource(strTrainingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTraining, rgDim, true, true)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); log.WriteLine("Creating the image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); m_rgImages.Clear(); m_factory.Close(); //----------------------------------------- // Create the testing data source. //----------------------------------------- int nTestSrcId = m_factory.AddSource(strTestingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTesting, rgDim, false, false)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); m_factory.Close(); //----------------------------------------- // Crate the data set. //----------------------------------------- log.WriteLine("Done loading training and testing data."); int nDatasetID = 0; using (DNNEntities entities = EntitiesConnection.CreateEntities()) { List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList(); List <Source> rgSrcTesting = entities.Sources.Where(p => p.Name == strTestingSrc).ToList(); if (rgSrcTraining.Count == 0) { throw new Exception("Could not find the training source '" + strTrainingSrc + "'."); } if (rgSrcTesting.Count == 0) { throw new Exception("Could not find the tesing source '" + strTestingSrc + "'."); } DataConfigSetting dsName = config.Settings.Find("Output Dataset Name"); int nSrcTestingCount = rgSrcTesting[0].ImageCount.GetValueOrDefault(); int nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault(); int nSrcTotalCount = nSrcTestingCount + nSrcTrainingCount; double dfTestingPct = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount; Dataset ds = new Dataset(); ds.ImageHeight = rgSrcTraining[0].ImageHeight; ds.ImageWidth = rgSrcTraining[0].ImageWidth; ds.Name = strDsName; ds.ImageEncoded = rgSrcTesting[0].ImageEncoded; ds.ImageChannels = rgSrcTesting[0].ImageChannels; ds.TestingPercent = (decimal)dfTestingPct; ds.TestingSourceID = rgSrcTesting[0].ID; ds.TestingTotal = rgSrcTesting[0].ImageCount; ds.TrainingSourceID = rgSrcTraining[0].ID; ds.TrainingTotal = rgSrcTraining[0].ImageCount; ds.DatasetCreatorID = config.ID; ds.DatasetGroupID = 0; ds.ModelGroupID = 0; entities.Datasets.Add(ds); entities.SaveChanges(); nDatasetID = ds.ID; } m_factory.SetDatasetParameter(nDatasetID, "PixelSize", m_schema.CellSize.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCount", m_parser.DataDescriptions.Count.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountHoriz", nCellHorizCount.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountVert", nCellHorizCount.ToString()); } catch (Exception excpt) { log.WriteLine("ERROR: " + excpt.Message); } finally { Properties.Settings.Default.CsvFile = strCsvFile; Properties.Settings.Default.Save(); if (m_bCancel) { log.WriteLine("ABORTED converting CSV data files."); } else { log.WriteLine("Done converting CSV data files."); } if (m_bCancel) { m_iprogress.OnCompleted(new CreateProgressArgs(1, 1, "ABORTED!", null, true)); } else { m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED.")); } } }