Ejemplo n.º 1
0
        public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress)
        {
            string strCsvFile     = Properties.Settings.Default.CsvFile;
            string strDsName      = config.Name;
            string strTrainingSrc = config.Name + ".training";
            string strTestingSrc  = config.Name + ".testing";

            m_bCancel   = false;
            m_iprogress = progress;
            m_factory.DeleteSources(strTrainingSrc, strTestingSrc);

            Log log = new Log("CSV Dataset Creator");

            log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine);

            try
            {
                //-----------------------------------------
                //  Load the schema that defines the layout
                //  of the CSV file.
                //-----------------------------------------

                m_schema = loadSchema(config.Settings);


                //-----------------------------------------
                // Load and parse the CSV file.
                //-----------------------------------------

                DataConfigSetting dsCsvFile = config.Settings.Find("CSV File");

                strCsvFile = dsCsvFile.Value.ToString();
                if (strCsvFile.Length == 0)
                {
                    throw new Exception("CSV data file name not specified!");
                }

                log.WriteLine("Loading the data file...");

                if (m_bCancel)
                {
                    return;
                }

                m_parser.Load(strCsvFile, m_schema);


                //-----------------------------------------
                // Split the data into training and testing
                // sets.
                //-----------------------------------------

                List <DataItem> rgTraining = new List <DataItem>();
                List <DataItem> rgTesting  = new List <DataItem>();

                DataConfigSetting dsPctTesting = config.Settings.Find("Testing Percentage");
                double            dfVal        = (double)dsPctTesting.Value;
                Random            random       = new Random();

                for (int i = 0; i < m_parser.Data.Count; i++)
                {
                    if (random.NextDouble() > dfVal)
                    {
                        rgTraining.Add(m_parser.Data[i]);
                    }
                    else
                    {
                        rgTesting.Add(m_parser.Data[i]);
                    }
                }

                Properties.Settings.Default.TestingPct = dfVal;


                //-----------------------------------------
                // Create the training data source.
                //-----------------------------------------

                int        nCellHorizCount = 0;
                List <int> rgDim           = getImageDim(m_parser, m_schema, out nCellHorizCount);
                int        nTrainSrcId     = m_factory.AddSource(strTrainingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0);
                m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data.

                log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'.");
                m_factory.DeleteSourceData();

                if (!loadData(log, m_factory, m_parser, rgTraining, rgDim, true, true))
                {
                    return;
                }

                m_factory.UpdateSourceCounts();
                updateLabels(m_factory);

                log.WriteLine("Creating the image mean...");
                SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) });
                m_factory.PutRawImageMean(dMean, true);
                m_rgImages.Clear();

                m_factory.Close();


                //-----------------------------------------
                // Create the testing data source.
                //-----------------------------------------

                int nTestSrcId = m_factory.AddSource(strTestingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0);
                m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data.

                log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'.");
                m_factory.DeleteSourceData();

                if (!loadData(log, m_factory, m_parser, rgTesting, rgDim, false, false))
                {
                    return;
                }

                m_factory.UpdateSourceCounts();
                updateLabels(m_factory);
                m_factory.Close();


                //-----------------------------------------
                // Crate the data set.
                //-----------------------------------------

                log.WriteLine("Done loading training and testing data.");
                int nDatasetID = 0;

                using (DNNEntities entities = EntitiesConnection.CreateEntities())
                {
                    List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList();
                    List <Source> rgSrcTesting  = entities.Sources.Where(p => p.Name == strTestingSrc).ToList();

                    if (rgSrcTraining.Count == 0)
                    {
                        throw new Exception("Could not find the training source '" + strTrainingSrc + "'.");
                    }

                    if (rgSrcTesting.Count == 0)
                    {
                        throw new Exception("Could not find the tesing source '" + strTestingSrc + "'.");
                    }

                    DataConfigSetting dsName = config.Settings.Find("Output Dataset Name");
                    int    nSrcTestingCount  = rgSrcTesting[0].ImageCount.GetValueOrDefault();
                    int    nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault();
                    int    nSrcTotalCount    = nSrcTestingCount + nSrcTrainingCount;
                    double dfTestingPct      = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount;

                    Dataset ds = new Dataset();
                    ds.ImageHeight      = rgSrcTraining[0].ImageHeight;
                    ds.ImageWidth       = rgSrcTraining[0].ImageWidth;
                    ds.Name             = strDsName;
                    ds.ImageEncoded     = rgSrcTesting[0].ImageEncoded;
                    ds.ImageChannels    = rgSrcTesting[0].ImageChannels;
                    ds.TestingPercent   = (decimal)dfTestingPct;
                    ds.TestingSourceID  = rgSrcTesting[0].ID;
                    ds.TestingTotal     = rgSrcTesting[0].ImageCount;
                    ds.TrainingSourceID = rgSrcTraining[0].ID;
                    ds.TrainingTotal    = rgSrcTraining[0].ImageCount;
                    ds.DatasetCreatorID = config.ID;
                    ds.DatasetGroupID   = 0;
                    ds.ModelGroupID     = 0;

                    entities.Datasets.Add(ds);
                    entities.SaveChanges();

                    nDatasetID = ds.ID;
                }

                m_factory.SetDatasetParameter(nDatasetID, "PixelSize", m_schema.CellSize.ToString());
                m_factory.SetDatasetParameter(nDatasetID, "AttributeCount", m_parser.DataDescriptions.Count.ToString());
                m_factory.SetDatasetParameter(nDatasetID, "AttributeCountHoriz", nCellHorizCount.ToString());
                m_factory.SetDatasetParameter(nDatasetID, "AttributeCountVert", nCellHorizCount.ToString());
            }
            catch (Exception excpt)
            {
                log.WriteLine("ERROR: " + excpt.Message);
            }
            finally
            {
                Properties.Settings.Default.CsvFile = strCsvFile;
                Properties.Settings.Default.Save();

                if (m_bCancel)
                {
                    log.WriteLine("ABORTED converting CSV data files.");
                }
                else
                {
                    log.WriteLine("Done converting CSV data files.");
                }

                if (m_bCancel)
                {
                    m_iprogress.OnCompleted(new CreateProgressArgs(1, 1, "ABORTED!", null, true));
                }
                else
                {
                    m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED."));
                }
            }
        }