/// <summary> /// Returns the image mean for the ImageSet. /// </summary> /// <param name="log">Specifies the Log used to output status.</param> /// <param name="rgAbort">Specifies a set of wait handles for aborting the operation.</param> /// <param name="bQueryOnly">Specifies whether or not to only query for the mean and not calculate if missing.</param> /// <returns>The SimpleDatum with the image mean is returned.</returns> public SimpleDatum GetImageMean(Log log, WaitHandle[] rgAbort, bool bQueryOnly) { if (m_imgMean != null || bQueryOnly) { return(m_imgMean); } int nLoadedCount = GetLoadedCount(); int nTotalCount = GetTotalCount(); if (nLoadedCount < nTotalCount) { double dfPct = (double)nLoadedCount / (double)nTotalCount; if (log != null) { log.WriteLine("WARNING: Cannot create the image mean until all images have loaded - the data is currently " + dfPct.ToString("P") + " loaded."); } return(null); } if (OnCalculateImageMean != null) { CalculateImageMeanArgs args = new CalculateImageMeanArgs(m_rgImages); OnCalculateImageMean(this, args); if (args.Cancelled) { return(null); } m_imgMean = args.ImageMean; return(m_imgMean); } RawImageMean imgMean = m_factory.GetRawImageMean(); if (m_imgMean != null) { m_imgMean = m_factory.LoadDatum(imgMean); } else { log.WriteLine("Calculating mean..."); m_imgMean = SimpleDatum.CalculateMean(log, m_rgImages, rgAbort); m_factory.PutRawImageMean(m_imgMean, true); } m_imgMean.SetLabel(0); return(m_imgMean); }
/// <summary> /// Returns the image mean for the ImageSet. /// </summary> /// <param name="log">Specifies the Log used to output status.</param> /// <param name="rgAbort">Specifies a set of wait handles for aborting the operation.</param> /// <returns>The SimpleDatum with the image mean is returned.</returns> public SimpleDatum GetImageMean(Log log, WaitHandle[] rgAbort) { if (m_imgMean != null) { return(m_imgMean); } if (m_rgImages.Length == 0) { if (log != null) { log.WriteLine("WARNING: Cannot create image mean with no images!"); } return(null); } if (m_loadMethod != IMAGEDB_LOAD_METHOD.LOAD_ALL) { throw new Exception("Can only create image mean when using LOAD_ALL."); } if (m_nLoadLimit != 0) { throw new Exception("Can only create image mean when LoadLimit = 0."); } if (OnCalculateImageMean != null) { CalculateImageMeanArgs args = new CalculateImageMeanArgs(m_rgImages); OnCalculateImageMean(this, args); if (args.Cancelled) { return(null); } m_imgMean = args.ImageMean; return(m_imgMean); } m_imgMean = SimpleDatum.CalculateMean(log, m_rgImages, rgAbort); m_imgMean.SetLabel(0); return(m_imgMean); }
public void LoadDatabase() { int nIdx = 0; int nTotal = 50000; reportProgress(nIdx, 0, "Loading database..."); Log log = new Log("MNIST"); log.OnWriteLine += Log_OnWriteLine; DatasetFactory factory = new DatasetFactory(); loadFile(m_param.DataBatchFile1, "CIFAR-10.training", nTotal, ref nIdx); loadFile(m_param.DataBatchFile2, "CIFAR-10.training", nTotal, ref nIdx); loadFile(m_param.DataBatchFile3, "CIFAR-10.training", nTotal, ref nIdx); loadFile(m_param.DataBatchFile4, "CIFAR-10.training", nTotal, ref nIdx); loadFile(m_param.DataBatchFile5, "CIFAR-10.training", nTotal, ref nIdx); SourceDescriptor srcTrain = factory.LoadSource("CIFAR-10.training"); m_factory.SaveImageMean(SimpleDatum.CalculateMean(log, m_rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true, srcTrain.ID); m_rgImg = new List <SimpleDatum>(); nIdx = 0; nTotal = 10000; loadFile(m_param.TestBatchFile, "CIFAR-10.testing", nTotal, ref nIdx); SourceDescriptor srcTest = factory.LoadSource("CIFAR-10.testing"); m_factory.SaveImageMean(SimpleDatum.CalculateMean(log, m_rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true, srcTest.ID); DatasetDescriptor ds = new DatasetDescriptor(0, "CIFAR-10", null, null, srcTrain, srcTest, "CIFAR-10", "CiFar-10 Dataset"); factory.AddDataset(ds); factory.UpdateDatasetCounts(ds.ID); if (OnCompleted != null) { OnCompleted(this, new EventArgs()); } }
private void loadFile(string strImagesFile, string strLabelsFile, string strSourceName) { Stopwatch sw = new Stopwatch(); reportProgress(0, 0, " Source: " + strSourceName); reportProgress(0, 0, " loading " + strImagesFile + "..."); BinaryFile image_file = new app.BinaryFile(strImagesFile); BinaryFile label_file = new app.BinaryFile(strLabelsFile); Log log = new Log("MNIST"); log.OnWriteLine += Log_OnWriteLine; try { // Verify the files uint magicImg = image_file.ReadUInt32(); uint magicLbl = label_file.ReadUInt32(); if (magicImg != 2051) { throw new Exception("Incorrect image file magic."); } if (magicLbl != 2049) { throw new Exception("Incorrect label file magic."); } uint num_items = image_file.ReadUInt32(); uint num_labels = label_file.ReadUInt32(); if (num_items != num_labels) { throw new Exception("The number of items must be equal to the number of labels!"); } // Add the data source to the database. uint rows = image_file.ReadUInt32(); uint cols = image_file.ReadUInt32(); int nChannels = 1; // black and white int nSrcId = m_factory.AddSource(strSourceName, nChannels, (int)cols, (int)rows, false, 0, true); m_factory.Open(nSrcId); m_factory.DeleteSourceData(); // Storing to database; byte[] rgLabel; byte[] rgPixels; Datum datum = new Datum(false, nChannels, (int)cols, (int)rows, -1, DateTime.MinValue, null, null, 0, false, -1); reportProgress(0, (int)num_items, " loading a total of " + num_items.ToString() + " items."); reportProgress(0, (int)num_items, " (with rows: " + rows.ToString() + ", cols: " + cols.ToString() + ")"); sw.Start(); List <SimpleDatum> rgImg = new List <SimpleDatum>(); for (int i = 0; i < num_items; i++) { rgPixels = image_file.ReadBytes((int)(rows * cols)); rgLabel = label_file.ReadBytes(1); if (sw.Elapsed.TotalMilliseconds > 1000) { reportProgress(i, (int)num_items, " loading data..."); sw.Restart(); } datum.SetData(rgPixels.ToList(), (int)rgLabel[0]); m_factory.PutRawImageCache(i, datum); rgImg.Add(new SimpleDatum(datum)); } m_factory.ClearImageCashe(true); m_factory.UpdateSourceCounts(); m_factory.SaveImageMean(SimpleDatum.CalculateMean(log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true); reportProgress((int)num_items, (int)num_items, " loading completed."); } finally { image_file.Dispose(); label_file.Dispose(); } }
public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress) { string strCsvFile = Properties.Settings.Default.CsvFile; string strDsName = config.Name; string strTrainingSrc = config.Name + ".training"; string strTestingSrc = config.Name + ".testing"; m_bCancel = false; m_iprogress = progress; m_factory.DeleteSources(strTrainingSrc, strTestingSrc); Log log = new Log("CSV Dataset Creator"); log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine); try { //----------------------------------------- // Load the schema that defines the layout // of the CSV file. //----------------------------------------- m_schema = loadSchema(config.Settings); //----------------------------------------- // Load and parse the CSV file. //----------------------------------------- DataConfigSetting dsCsvFile = config.Settings.Find("CSV File"); strCsvFile = dsCsvFile.Value.ToString(); if (strCsvFile.Length == 0) { throw new Exception("CSV data file name not specified!"); } log.WriteLine("Loading the data file..."); if (m_bCancel) { return; } m_parser.Load(strCsvFile, m_schema); //----------------------------------------- // Split the data into training and testing // sets. //----------------------------------------- List <DataItem> rgTraining = new List <DataItem>(); List <DataItem> rgTesting = new List <DataItem>(); DataConfigSetting dsPctTesting = config.Settings.Find("Testing Percentage"); double dfVal = (double)dsPctTesting.Value; Random random = new Random(); for (int i = 0; i < m_parser.Data.Count; i++) { if (random.NextDouble() > dfVal) { rgTraining.Add(m_parser.Data[i]); } else { rgTesting.Add(m_parser.Data[i]); } } Properties.Settings.Default.TestingPct = dfVal; //----------------------------------------- // Create the training data source. //----------------------------------------- int nCellHorizCount = 0; List <int> rgDim = getImageDim(m_parser, m_schema, out nCellHorizCount); int nTrainSrcId = m_factory.AddSource(strTrainingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTraining, rgDim, true, true)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); log.WriteLine("Creating the image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); m_rgImages.Clear(); m_factory.Close(); //----------------------------------------- // Create the testing data source. //----------------------------------------- int nTestSrcId = m_factory.AddSource(strTestingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTesting, rgDim, false, false)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); m_factory.Close(); //----------------------------------------- // Crate the data set. //----------------------------------------- log.WriteLine("Done loading training and testing data."); int nDatasetID = 0; using (DNNEntities entities = EntitiesConnection.CreateEntities()) { List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList(); List <Source> rgSrcTesting = entities.Sources.Where(p => p.Name == strTestingSrc).ToList(); if (rgSrcTraining.Count == 0) { throw new Exception("Could not find the training source '" + strTrainingSrc + "'."); } if (rgSrcTesting.Count == 0) { throw new Exception("Could not find the tesing source '" + strTestingSrc + "'."); } DataConfigSetting dsName = config.Settings.Find("Output Dataset Name"); int nSrcTestingCount = rgSrcTesting[0].ImageCount.GetValueOrDefault(); int nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault(); int nSrcTotalCount = nSrcTestingCount + nSrcTrainingCount; double dfTestingPct = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount; Dataset ds = new Dataset(); ds.ImageHeight = rgSrcTraining[0].ImageHeight; ds.ImageWidth = rgSrcTraining[0].ImageWidth; ds.Name = strDsName; ds.ImageEncoded = rgSrcTesting[0].ImageEncoded; ds.ImageChannels = rgSrcTesting[0].ImageChannels; ds.TestingPercent = (decimal)dfTestingPct; ds.TestingSourceID = rgSrcTesting[0].ID; ds.TestingTotal = rgSrcTesting[0].ImageCount; ds.TrainingSourceID = rgSrcTraining[0].ID; ds.TrainingTotal = rgSrcTraining[0].ImageCount; ds.DatasetCreatorID = config.ID; ds.DatasetGroupID = 0; ds.ModelGroupID = 0; entities.Datasets.Add(ds); entities.SaveChanges(); nDatasetID = ds.ID; } m_factory.SetDatasetParameter(nDatasetID, "PixelSize", m_schema.CellSize.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCount", m_parser.DataDescriptions.Count.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountHoriz", nCellHorizCount.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountVert", nCellHorizCount.ToString()); } catch (Exception excpt) { log.WriteLine("ERROR: " + excpt.Message); } finally { Properties.Settings.Default.CsvFile = strCsvFile; Properties.Settings.Default.Save(); if (m_bCancel) { log.WriteLine("ABORTED converting CSV data files."); } else { log.WriteLine("Done converting CSV data files."); } if (m_bCancel) { m_iprogress.OnCompleted(new CreateProgressArgs(1, 1, "ABORTED!", null, true)); } else { m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED.")); } } }
/// <summary> /// Create the dataset and load it into the database. /// </summary> /// <param name="nCreatorID">Specifies the creator ID.</param> /// <returns>On successful creation, <i>true</i> is returned, otherwise <i>false</i> is returned on abort.</returns> public bool LoadDatabase(int nCreatorID = 0) { try { int nIdx = 0; int nTotal = 50000; reportProgress(nIdx, 0, "Loading database " + dataset_name + "..."); DatasetFactory factory = new DatasetFactory(); string strTrainSrc = dataset_name + ".training"; int nSrcId = factory.GetSourceID(strTrainSrc); if (nSrcId != 0) { factory.DeleteSourceData(nSrcId); } if (!loadFile(m_param.DataBatchFile1, strTrainSrc, nTotal, ref nIdx, m_log)) { return(false); } if (!loadFile(m_param.DataBatchFile2, strTrainSrc, nTotal, ref nIdx, m_log)) { return(false); } if (!loadFile(m_param.DataBatchFile3, strTrainSrc, nTotal, ref nIdx, m_log)) { return(false); } if (!loadFile(m_param.DataBatchFile4, strTrainSrc, nTotal, ref nIdx, m_log)) { return(false); } if (!loadFile(m_param.DataBatchFile5, strTrainSrc, nTotal, ref nIdx, m_log)) { return(false); } SourceDescriptor srcTrain = factory.LoadSource(strTrainSrc); m_factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, m_rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true, srcTrain.ID); m_rgImg = new List <SimpleDatum>(); nIdx = 0; nTotal = 10000; string strTestSrc = dataset_name + ".testing"; nSrcId = factory.GetSourceID(strTestSrc); if (nSrcId != 0) { factory.DeleteSourceData(nSrcId); } if (!loadFile(m_param.TestBatchFile, strTestSrc, nTotal, ref nIdx, m_log)) { return(false); } SourceDescriptor srcTest = factory.LoadSource(strTestSrc); m_factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, m_rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true, srcTest.ID); DatasetDescriptor ds = new DatasetDescriptor(nCreatorID, dataset_name, null, null, srcTrain, srcTest, dataset_name, dataset_name + " Dataset"); factory.AddDataset(ds); factory.UpdateDatasetCounts(ds.ID); return(true); } catch (Exception excpt) { throw excpt; } finally { if (OnCompleted != null) { OnCompleted(this, new EventArgs()); } } }
public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress) { string strTrainingBatchFile1 = Properties.Settings.Default.TrainingDataFile1; string strTrainingBatchFile2 = Properties.Settings.Default.TrainingDataFile2; string strTrainingBatchFile3 = Properties.Settings.Default.TrainingDataFile3; string strTrainingBatchFile4 = Properties.Settings.Default.TrainingDataFile4; string strTrainingBatchFile5 = Properties.Settings.Default.TrainingDataFile5; string strTestingBatchFile = Properties.Settings.Default.TestingDataFile; string strDsName = config.Name; string strTrainingSrc = config.Name + ".training"; string strTestingSrc = config.Name + ".testing"; int nIdx = 0; int nTotal = 50000; m_bCancel = false; m_iprogress = progress; m_factory.DeleteSources(strTrainingSrc, strTestingSrc); Log log = new Log("CIFAR Dataset Creator"); log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine); try { DataConfigSetting dsTrainingDataFile1 = config.Settings.Find("Training Data File 1"); DataConfigSetting dsTrainingDataFile2 = config.Settings.Find("Training Data File 2"); DataConfigSetting dsTrainingDataFile3 = config.Settings.Find("Training Data File 3"); DataConfigSetting dsTrainingDataFile4 = config.Settings.Find("Training Data File 4"); DataConfigSetting dsTrainingDataFile5 = config.Settings.Find("Training Data File 5"); DataConfigSetting dsTestingDataFile = config.Settings.Find("Testing Data File"); strTrainingBatchFile1 = dsTrainingDataFile1.Value.ToString(); if (strTrainingBatchFile1.Length == 0) { throw new Exception("Training data file #1 name not specified!"); } strTrainingBatchFile2 = dsTrainingDataFile2.Value.ToString(); if (strTrainingBatchFile2.Length == 0) { throw new Exception("Training data file #2 name not specified!"); } strTrainingBatchFile3 = dsTrainingDataFile3.Value.ToString(); if (strTrainingBatchFile3.Length == 0) { throw new Exception("Training data file #3 name not specified!"); } strTrainingBatchFile4 = dsTrainingDataFile4.Value.ToString(); if (strTrainingBatchFile4.Length == 0) { throw new Exception("Training data file #4 name not specified!"); } strTrainingBatchFile5 = dsTrainingDataFile5.Value.ToString(); if (strTrainingBatchFile5.Length == 0) { throw new Exception("Training data file #5 name not specified!"); } strTestingBatchFile = dsTestingDataFile.Value.ToString(); if (strTestingBatchFile.Length == 0) { throw new Exception("Testing data file name not specified!"); } log.WriteLine("Loading the data files..."); if (m_bCancel) { return; } int nTrainSrcId = m_factory.AddSource(strTrainingSrc, 3, 32, 32, false, 0); m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadFile(log, dsTrainingDataFile1.Name, strTrainingBatchFile1, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile2.Name, strTrainingBatchFile2, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile3.Name, strTrainingBatchFile3, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile4.Name, strTrainingBatchFile4, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile5.Name, strTrainingBatchFile5, m_factory, nTotal, true, ref nIdx)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); log.WriteLine("Creating the image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); m_rgImages.Clear(); m_factory.Close(); int nTestSrcId = m_factory.AddSource(strTestingSrc, 3, 32, 32, false, 0); m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); nIdx = 0; nTotal = 10000; if (!loadFile(log, dsTestingDataFile.Name, strTestingBatchFile, m_factory, nTotal, false, ref nIdx)) { return; } m_factory.CopyImageMean(strTrainingSrc, strTestingSrc); m_factory.UpdateSourceCounts(); updateLabels(m_factory); m_factory.Close(); log.WriteLine("Done loading training and testing data."); using (DNNEntities entities = EntitiesConnection.CreateEntities()) { List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList(); List <Source> rgSrcTesting = entities.Sources.Where(p => p.Name == strTestingSrc).ToList(); if (rgSrcTraining.Count == 0) { throw new Exception("Could not find the training source '" + strTrainingSrc + "'."); } if (rgSrcTesting.Count == 0) { throw new Exception("Could not find the tesing source '" + strTestingSrc + "'."); } DataConfigSetting dsName = config.Settings.Find("Output Dataset Name"); int nSrcTestingCount = rgSrcTesting[0].ImageCount.GetValueOrDefault(); int nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault(); int nSrcTotalCount = nSrcTestingCount + nSrcTrainingCount; double dfTestingPct = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount; Dataset ds = new Dataset(); ds.ImageHeight = rgSrcTraining[0].ImageHeight; ds.ImageWidth = rgSrcTraining[0].ImageWidth; ds.Name = strDsName; ds.ImageEncoded = rgSrcTesting[0].ImageEncoded; ds.ImageChannels = rgSrcTesting[0].ImageChannels; ds.TestingPercent = (decimal)dfTestingPct; ds.TestingSourceID = rgSrcTesting[0].ID; ds.TestingTotal = rgSrcTesting[0].ImageCount; ds.TrainingSourceID = rgSrcTraining[0].ID; ds.TrainingTotal = rgSrcTraining[0].ImageCount; ds.DatasetCreatorID = config.ID; ds.DatasetGroupID = 0; ds.ModelGroupID = 0; entities.Datasets.Add(ds); entities.SaveChanges(); } } catch (Exception excpt) { log.WriteLine("ERROR: " + excpt.Message); } finally { Properties.Settings.Default.TrainingDataFile1 = strTrainingBatchFile1; Properties.Settings.Default.TrainingDataFile2 = strTrainingBatchFile2; Properties.Settings.Default.TrainingDataFile3 = strTrainingBatchFile3; Properties.Settings.Default.TrainingDataFile4 = strTrainingBatchFile4; Properties.Settings.Default.TrainingDataFile5 = strTrainingBatchFile5; Properties.Settings.Default.TestingDataFile = strTestingBatchFile; Properties.Settings.Default.Save(); if (m_bCancel) { log.WriteLine("ABORTED converting CIFAR data files."); } else { log.WriteLine("Done converting CIFAR data files."); } if (m_bCancel) { m_iprogress.OnCompleted(new CreateProgressArgs(nIdx, nTotal, "ABORTED!", null, true)); } else { m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED.")); } } }
private bool loadFile(DatasetFactory factory, List <Tuple <byte[], int> > rgData, int nC, int nH, int nW, string strSourceName, string strExportPath) { if (strExportPath != null) { strExportPath += strSourceName; if (!Directory.Exists(strExportPath)) { Directory.CreateDirectory(strExportPath); } } Stopwatch sw = new Stopwatch(); reportProgress(0, 0, " Source: " + strSourceName); try { if (factory != null) { int nSrcId = factory.AddSource(strSourceName, nC, nW, nH, false, 0, true); factory.Open(nSrcId, 500, Database.FORCE_LOAD.NONE, m_log); factory.DeleteSourceData(); } // Storing to database; int nLabel; byte[] rgPixels; Datum datum = new Datum(false, nC, nW, nH, -1, DateTime.MinValue, new List <byte>(), 0, false, -1); string strAction = (m_param.ExportToFile) ? "exporing" : "loading"; reportProgress(0, rgData.Count, " " + strAction + " a total of " + rgData.Count.ToString() + " items."); reportProgress(0, rgData.Count, " (with rows: " + nH.ToString() + ", cols: " + nW.ToString() + ")"); sw.Start(); List <SimpleDatum> rgImg = new List <SimpleDatum>(); FileStream fsFileDesc = null; StreamWriter swFileDesc = null; if (m_param.ExportToFile) { string strFile = strExportPath + "\\file_list.txt"; fsFileDesc = File.OpenWrite(strFile); swFileDesc = new StreamWriter(fsFileDesc); } for (int i = 0; i < rgData.Count; i++) { rgPixels = rgData[i].Item1; nLabel = rgData[i].Item2; if (sw.Elapsed.TotalMilliseconds > 1000) { reportProgress(i, rgData.Count, " " + strAction + " data..."); sw.Restart(); } datum.SetData(rgPixels, nLabel); if (factory != null) { factory.PutRawImageCache(i, datum, 5); } else if (strExportPath != null) { saveToFile(strExportPath, i, datum, swFileDesc); } rgImg.Add(new SimpleDatum(datum)); if (m_evtCancel.WaitOne(0)) { return(false); } } if (swFileDesc != null) { swFileDesc.Flush(); swFileDesc.Close(); swFileDesc.Dispose(); fsFileDesc.Close(); fsFileDesc.Dispose(); } if (factory != null) { factory.ClearImageCache(true); factory.UpdateSourceCounts(); factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true); } reportProgress(rgData.Count, rgData.Count, " " + strAction + " completed."); } finally { } return(true); }
public uint ConvertData(string strImageFile, string strLabelFile, string strDBPath, string strDBPathMean, bool bCreateImgMean, bool bGetItemCountOnly = false, int nChannels = 1) { string strExt; List <SimpleDatum> rgImg = new List <SimpleDatum>(); strExt = Path.GetExtension(strImageFile).ToLower(); if (strExt == ".gz") { m_log.WriteLine("Unpacking '" + strImageFile + "'..."); strImageFile = expandFile(strImageFile); } strExt = Path.GetExtension(strLabelFile).ToLower(); if (strExt == ".gz") { m_log.WriteLine("Unpacking '" + strLabelFile + "'..."); strLabelFile = expandFile(strLabelFile); } BinaryFile image_file = new BinaryFile(strImageFile); BinaryFile label_file = new BinaryFile(strLabelFile); try { uint magicImg = image_file.ReadUInt32(); uint magicLbl = label_file.ReadUInt32(); if (magicImg != 2051) { if (m_log != null) { m_log.FAIL("Incorrect image file magic."); } if (OnLoadError != null) { OnLoadError(this, new LoadErrorArgs("Incorrect image file magic.")); } } if (magicLbl != 2049) { if (m_log != null) { m_log.FAIL("Incorrect label file magic."); } if (OnLoadError != null) { OnLoadError(this, new LoadErrorArgs("Incorrect label file magic.")); } } uint num_items = image_file.ReadUInt32(); uint num_labels = label_file.ReadUInt32(); if (num_items != num_labels) { if (m_log != null) { m_log.FAIL("The number of items must equal the number of labels."); } throw new Exception("The number of items must equal the number of labels." + Environment.NewLine + " Label File: '" + strLabelFile + Environment.NewLine + " Image File: '" + strImageFile + "'."); } if (bGetItemCountOnly) { return(num_items); } uint rows = image_file.ReadUInt32(); uint cols = image_file.ReadUInt32(); int nSrcId = m_factory.AddSource(strDBPath, nChannels, (int)cols, (int)rows, false, 0, true); m_factory.Open(nSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. m_factory.DeleteSourceData(); // Storing to db byte[] rgLabel; byte[] rgPixels; Datum datum = new Datum(false, nChannels, (int)cols, (int)rows); if (m_log != null) { m_log.WriteHeader("LOADING " + strDBPath + " items."); m_log.WriteLine("A total of " + num_items.ToString() + " items."); m_log.WriteLine("Rows: " + rows.ToString() + " Cols: " + cols.ToString()); } if (OnLoadStart != null) { OnLoadStart(this, new LoadStartArgs((int)num_items)); } for (int item_id = 0; item_id < num_items; item_id++) { rgPixels = image_file.ReadBytes((int)(rows * cols)); rgLabel = label_file.ReadBytes(1); List <byte> rgData = new List <byte>(rgPixels); if (nChannels == 3) { rgData.AddRange(new List <byte>(rgPixels)); rgData.AddRange(new List <byte>(rgPixels)); } datum.SetData(rgData, (int)rgLabel[0]); if (m_bmpTargetOverlay != null) { datum = createTargetOverlay(datum); } m_factory.PutRawImageCache(item_id, datum); if (bCreateImgMean) { rgImg.Add(new SimpleDatum(datum)); } if ((item_id % 1000) == 0) { if (m_log != null) { m_log.WriteLine("Loaded " + item_id.ToString("N") + " items..."); m_log.Progress = (double)item_id / (double)num_items; } if (OnLoadProgress != null) { LoadArgs args = new LoadArgs(item_id); OnLoadProgress(this, args); if (args.Cancel) { break; } } } } m_factory.ClearImageCache(true); m_factory.UpdateSourceCounts(); if (bCreateImgMean) { if (strDBPath != strDBPathMean) { m_factory.CopyImageMean(strDBPathMean, strDBPath); } else { m_log.WriteLine("Creating image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(m_log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); } } if (OnLoadProgress != null) { LoadArgs args = new LoadArgs((int)num_items); OnLoadProgress(this, args); } return(num_items); } finally { image_file.Dispose(); label_file.Dispose(); } }
private bool loadFile(DatasetFactory factory, string strImagesFile, string strLabelsFile, string strSourceName, string strExportPath) { if (strExportPath != null) { strExportPath += strSourceName; if (!Directory.Exists(strExportPath)) { Directory.CreateDirectory(strExportPath); } } Stopwatch sw = new Stopwatch(); reportProgress(0, 0, " Source: " + strSourceName); reportProgress(0, 0, " loading " + strImagesFile + "..."); BinaryFile image_file = new BinaryFile(strImagesFile); BinaryFile label_file = new BinaryFile(strLabelsFile); try { // Verify the files uint magicImg = image_file.ReadUInt32(); uint magicLbl = label_file.ReadUInt32(); if (magicImg != 2051) { throw new Exception("Incorrect image file magic."); } if (magicLbl != 2049) { throw new Exception("Incorrect label file magic."); } uint num_items = image_file.ReadUInt32(); uint num_labels = label_file.ReadUInt32(); if (num_items != num_labels) { throw new Exception("The number of items must be equal to the number of labels!"); } // Add the data source to the database. uint rows = image_file.ReadUInt32(); uint cols = image_file.ReadUInt32(); int nChannels = 1; // black and white if (factory != null) { int nSrcId = factory.AddSource(strSourceName, nChannels, (int)cols, (int)rows, false, 0, true); factory.Open(nSrcId, 500, Database.FORCE_LOAD.NONE, m_log); factory.DeleteSourceData(); } // Storing to database; byte[] rgLabel; byte[] rgPixels; Datum datum = new Datum(false, nChannels, (int)cols, (int)rows, -1, DateTime.MinValue, new List <byte>(), 0, false, -1); string strAction = (m_param.ExportToFile) ? "exporing" : "loading"; reportProgress(0, (int)num_items, " " + strAction + " a total of " + num_items.ToString() + " items."); reportProgress(0, (int)num_items, " (with rows: " + rows.ToString() + ", cols: " + cols.ToString() + ")"); sw.Start(); List <SimpleDatum> rgImg = new List <SimpleDatum>(); FileStream fsFileDesc = null; StreamWriter swFileDesc = null; if (m_param.ExportToFile) { string strFile = strExportPath + "\\file_list.txt"; fsFileDesc = File.OpenWrite(strFile); swFileDesc = new StreamWriter(fsFileDesc); } for (int i = 0; i < num_items; i++) { rgPixels = image_file.ReadBytes((int)(rows * cols)); rgLabel = label_file.ReadBytes(1); if (sw.Elapsed.TotalMilliseconds > 1000) { reportProgress(i, (int)num_items, " " + strAction + " data..."); sw.Restart(); } datum.SetData(rgPixels.ToList(), (int)rgLabel[0]); if (factory != null) { factory.PutRawImageCache(i, datum); } else if (strExportPath != null) { saveToFile(strExportPath, i, datum, swFileDesc); } rgImg.Add(new SimpleDatum(datum)); if (m_evtCancel.WaitOne(0)) { return(false); } } if (swFileDesc != null) { swFileDesc.Flush(); swFileDesc.Close(); swFileDesc.Dispose(); fsFileDesc.Close(); fsFileDesc.Dispose(); } if (factory != null) { factory.ClearImageCashe(true); factory.UpdateSourceCounts(); factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true); } reportProgress((int)num_items, (int)num_items, " " + strAction + " completed."); } finally { image_file.Dispose(); label_file.Dispose(); } return(true); }