/// <summary> /// Returns the image mean for the ImageSet. /// </summary> /// <param name="log">Specifies the Log used to output status.</param> /// <param name="rgAbort">Specifies a set of wait handles for aborting the operation.</param> /// <param name="bQueryOnly">Specifies whether or not to only query for the mean and not calculate if missing.</param> /// <returns>The SimpleDatum with the image mean is returned.</returns> public SimpleDatum GetImageMean(Log log, WaitHandle[] rgAbort, bool bQueryOnly) { if (m_imgMean != null || bQueryOnly) { return(m_imgMean); } int nLoadedCount = GetLoadedCount(); int nTotalCount = GetTotalCount(); if (nLoadedCount < nTotalCount) { double dfPct = (double)nLoadedCount / (double)nTotalCount; if (log != null) { log.WriteLine("WARNING: Cannot create the image mean until all images have loaded - the data is currently " + dfPct.ToString("P") + " loaded."); } return(null); } if (OnCalculateImageMean != null) { CalculateImageMeanArgs args = new CalculateImageMeanArgs(m_rgImages); OnCalculateImageMean(this, args); if (args.Cancelled) { return(null); } m_imgMean = args.ImageMean; return(m_imgMean); } RawImageMean imgMean = m_factory.GetRawImageMean(); if (m_imgMean != null) { m_imgMean = m_factory.LoadDatum(imgMean); } else { log.WriteLine("Calculating mean..."); m_imgMean = SimpleDatum.CalculateMean(log, m_rgImages, rgAbort); m_factory.PutRawImageMean(m_imgMean, true); } m_imgMean.SetLabel(0); return(m_imgMean); }
public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress) { string strCsvFile = Properties.Settings.Default.CsvFile; string strDsName = config.Name; string strTrainingSrc = config.Name + ".training"; string strTestingSrc = config.Name + ".testing"; m_bCancel = false; m_iprogress = progress; m_factory.DeleteSources(strTrainingSrc, strTestingSrc); Log log = new Log("CSV Dataset Creator"); log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine); try { //----------------------------------------- // Load the schema that defines the layout // of the CSV file. //----------------------------------------- m_schema = loadSchema(config.Settings); //----------------------------------------- // Load and parse the CSV file. //----------------------------------------- DataConfigSetting dsCsvFile = config.Settings.Find("CSV File"); strCsvFile = dsCsvFile.Value.ToString(); if (strCsvFile.Length == 0) { throw new Exception("CSV data file name not specified!"); } log.WriteLine("Loading the data file..."); if (m_bCancel) { return; } m_parser.Load(strCsvFile, m_schema); //----------------------------------------- // Split the data into training and testing // sets. //----------------------------------------- List <DataItem> rgTraining = new List <DataItem>(); List <DataItem> rgTesting = new List <DataItem>(); DataConfigSetting dsPctTesting = config.Settings.Find("Testing Percentage"); double dfVal = (double)dsPctTesting.Value; Random random = new Random(); for (int i = 0; i < m_parser.Data.Count; i++) { if (random.NextDouble() > dfVal) { rgTraining.Add(m_parser.Data[i]); } else { rgTesting.Add(m_parser.Data[i]); } } Properties.Settings.Default.TestingPct = dfVal; //----------------------------------------- // Create the training data source. //----------------------------------------- int nCellHorizCount = 0; List <int> rgDim = getImageDim(m_parser, m_schema, out nCellHorizCount); int nTrainSrcId = m_factory.AddSource(strTrainingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTraining, rgDim, true, true)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); log.WriteLine("Creating the image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); m_rgImages.Clear(); m_factory.Close(); //----------------------------------------- // Create the testing data source. //----------------------------------------- int nTestSrcId = m_factory.AddSource(strTestingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTesting, rgDim, false, false)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); m_factory.Close(); //----------------------------------------- // Crate the data set. //----------------------------------------- log.WriteLine("Done loading training and testing data."); int nDatasetID = 0; using (DNNEntities entities = EntitiesConnection.CreateEntities()) { List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList(); List <Source> rgSrcTesting = entities.Sources.Where(p => p.Name == strTestingSrc).ToList(); if (rgSrcTraining.Count == 0) { throw new Exception("Could not find the training source '" + strTrainingSrc + "'."); } if (rgSrcTesting.Count == 0) { throw new Exception("Could not find the tesing source '" + strTestingSrc + "'."); } DataConfigSetting dsName = config.Settings.Find("Output Dataset Name"); int nSrcTestingCount = rgSrcTesting[0].ImageCount.GetValueOrDefault(); int nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault(); int nSrcTotalCount = nSrcTestingCount + nSrcTrainingCount; double dfTestingPct = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount; Dataset ds = new Dataset(); ds.ImageHeight = rgSrcTraining[0].ImageHeight; ds.ImageWidth = rgSrcTraining[0].ImageWidth; ds.Name = strDsName; ds.ImageEncoded = rgSrcTesting[0].ImageEncoded; ds.ImageChannels = rgSrcTesting[0].ImageChannels; ds.TestingPercent = (decimal)dfTestingPct; ds.TestingSourceID = rgSrcTesting[0].ID; ds.TestingTotal = rgSrcTesting[0].ImageCount; ds.TrainingSourceID = rgSrcTraining[0].ID; ds.TrainingTotal = rgSrcTraining[0].ImageCount; ds.DatasetCreatorID = config.ID; ds.DatasetGroupID = 0; ds.ModelGroupID = 0; entities.Datasets.Add(ds); entities.SaveChanges(); nDatasetID = ds.ID; } m_factory.SetDatasetParameter(nDatasetID, "PixelSize", m_schema.CellSize.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCount", m_parser.DataDescriptions.Count.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountHoriz", nCellHorizCount.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountVert", nCellHorizCount.ToString()); } catch (Exception excpt) { log.WriteLine("ERROR: " + excpt.Message); } finally { Properties.Settings.Default.CsvFile = strCsvFile; Properties.Settings.Default.Save(); if (m_bCancel) { log.WriteLine("ABORTED converting CSV data files."); } else { log.WriteLine("Done converting CSV data files."); } if (m_bCancel) { m_iprogress.OnCompleted(new CreateProgressArgs(1, 1, "ABORTED!", null, true)); } else { m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED.")); } } }
private ImageSet loadImageset(string strType, SourceDescriptor src, WaitHandle[] rgAbort, ref SimpleDatum imgMean, out int nLastImageIdx, int nPadW = 0, int nPadH = 0, Log log = null, IMAGEDB_LOAD_METHOD loadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL, int nImageDbLoadLimit = 0, int nImageDbLoadLimitStartIdx = 0, bool bLoadNext = false) { try { RawImageMean imgMeanRaw = null; m_factory.Open(src); nLastImageIdx = nImageDbLoadLimitStartIdx; if (loadMethod != IMAGEDB_LOAD_METHOD.LOAD_ALL) { if (imgMean == null) { imgMeanRaw = m_factory.GetRawImageMean(); if (imgMeanRaw == null) { if (log != null) { log.WriteLine("WARNING: No image mean exists in the database, changing image database load from " + loadMethod.ToString() + " to " + IMAGEDB_LOAD_METHOD.LOAD_ALL.ToString()); } loadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL; } } } int nCount = src.ImageCount; if (nCount == 0) { throw new Exception("Could not find any images with " + strType + " Source = '" + src.Name + "'."); } if (log != null) { log.WriteLine("Loading '" + src.Name + "' - " + nCount.ToString("N0") + " images."); } ImageSet imgset = new ImageSet(m_factory, src, loadMethod, nImageDbLoadLimit); if (OnCalculateImageMean != null) { imgset.OnCalculateImageMean += OnCalculateImageMean; } if (loadMethod != IMAGEDB_LOAD_METHOD.LOAD_ON_DEMAND) { bool bDataIsReal = src.IsRealData; int nBatchSize = 20000; Stopwatch sw = new Stopwatch(); int nImageSize = src.ImageHeight * src.ImageWidth; if (nImageSize > 60000) { nBatchSize = 5000; } else if (nBatchSize > 20000) { nBatchSize = 7500; } else if (nImageSize > 3000) { nBatchSize = 10000; } if (nImageDbLoadLimit <= 0) { nImageDbLoadLimit = nCount; } List <int> rgIdx = getIndexList(nImageDbLoadLimitStartIdx, nImageDbLoadLimit); int nIdx = 0; sw.Start(); while (nIdx < rgIdx.Count) { int nImageIdx = rgIdx[nIdx]; int nImageCount = Math.Min(rgIdx.Count - nIdx, nBatchSize); List <RawImage> rgImg = m_factory.GetRawImagesAt(nImageIdx, nImageCount); for (int j = 0; j < rgImg.Count; j++) { SimpleDatum sd1 = m_factory.LoadDatum(rgImg[j], nPadW, nPadH); imgset.Add(nIdx + j, sd1); if (sw.Elapsed.TotalMilliseconds > 1000) { if (log != null) { double dfPct = (double)(nIdx + j) / (double)nCount; log.Progress = dfPct; log.WriteLine("image loading at " + dfPct.ToString("P") + "..."); } sw.Restart(); if (EventWaitHandle.WaitAny(rgAbort, 0) != EventWaitHandle.WaitTimeout) { return(null); } } } nIdx += rgImg.Count; if (loadMethod == IMAGEDB_LOAD_METHOD.LOAD_ALL && rgImg.Count == 0 && nIdx < nCount) { log.WriteLine("WARNING: Loaded " + nIdx.ToString("N0") + " images, yet " + (nCount - nIdx).ToString("N0") + " images are unaccounted for. You may need to reindex the dataset."); break; } } if (log != null) { log.Progress = 0; } if (rgIdx.Count > 0) { nLastImageIdx = rgIdx[rgIdx.Count - 1] + 1; } } else if (bLoadNext) { nLastImageIdx += nImageDbLoadLimit; } if (imgMean == null) { if (imgMeanRaw == null) { imgMeanRaw = m_factory.GetRawImageMean(); } if (imgMeanRaw != null) { imgMean = m_factory.LoadDatum(imgMeanRaw, nPadW, nPadH); } else { if (log != null) { log.WriteLine("Calculating mean..."); } imgMean = imgset.GetImageMean(log, rgAbort); m_factory.PutRawImageMean(imgMean, true); } } if (imgMean != null) { imgset.SetImageMean(imgMean); } imgset.CompleteLoad(nLastImageIdx); return(imgset); } finally { m_factory.Close(); } }
public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress) { string strTrainingBatchFile1 = Properties.Settings.Default.TrainingDataFile1; string strTrainingBatchFile2 = Properties.Settings.Default.TrainingDataFile2; string strTrainingBatchFile3 = Properties.Settings.Default.TrainingDataFile3; string strTrainingBatchFile4 = Properties.Settings.Default.TrainingDataFile4; string strTrainingBatchFile5 = Properties.Settings.Default.TrainingDataFile5; string strTestingBatchFile = Properties.Settings.Default.TestingDataFile; string strDsName = config.Name; string strTrainingSrc = config.Name + ".training"; string strTestingSrc = config.Name + ".testing"; int nIdx = 0; int nTotal = 50000; m_bCancel = false; m_iprogress = progress; m_factory.DeleteSources(strTrainingSrc, strTestingSrc); Log log = new Log("CIFAR Dataset Creator"); log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine); try { DataConfigSetting dsTrainingDataFile1 = config.Settings.Find("Training Data File 1"); DataConfigSetting dsTrainingDataFile2 = config.Settings.Find("Training Data File 2"); DataConfigSetting dsTrainingDataFile3 = config.Settings.Find("Training Data File 3"); DataConfigSetting dsTrainingDataFile4 = config.Settings.Find("Training Data File 4"); DataConfigSetting dsTrainingDataFile5 = config.Settings.Find("Training Data File 5"); DataConfigSetting dsTestingDataFile = config.Settings.Find("Testing Data File"); strTrainingBatchFile1 = dsTrainingDataFile1.Value.ToString(); if (strTrainingBatchFile1.Length == 0) { throw new Exception("Training data file #1 name not specified!"); } strTrainingBatchFile2 = dsTrainingDataFile2.Value.ToString(); if (strTrainingBatchFile2.Length == 0) { throw new Exception("Training data file #2 name not specified!"); } strTrainingBatchFile3 = dsTrainingDataFile3.Value.ToString(); if (strTrainingBatchFile3.Length == 0) { throw new Exception("Training data file #3 name not specified!"); } strTrainingBatchFile4 = dsTrainingDataFile4.Value.ToString(); if (strTrainingBatchFile4.Length == 0) { throw new Exception("Training data file #4 name not specified!"); } strTrainingBatchFile5 = dsTrainingDataFile5.Value.ToString(); if (strTrainingBatchFile5.Length == 0) { throw new Exception("Training data file #5 name not specified!"); } strTestingBatchFile = dsTestingDataFile.Value.ToString(); if (strTestingBatchFile.Length == 0) { throw new Exception("Testing data file name not specified!"); } log.WriteLine("Loading the data files..."); if (m_bCancel) { return; } int nTrainSrcId = m_factory.AddSource(strTrainingSrc, 3, 32, 32, false, 0); m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadFile(log, dsTrainingDataFile1.Name, strTrainingBatchFile1, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile2.Name, strTrainingBatchFile2, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile3.Name, strTrainingBatchFile3, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile4.Name, strTrainingBatchFile4, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile5.Name, strTrainingBatchFile5, m_factory, nTotal, true, ref nIdx)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); log.WriteLine("Creating the image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); m_rgImages.Clear(); m_factory.Close(); int nTestSrcId = m_factory.AddSource(strTestingSrc, 3, 32, 32, false, 0); m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); nIdx = 0; nTotal = 10000; if (!loadFile(log, dsTestingDataFile.Name, strTestingBatchFile, m_factory, nTotal, false, ref nIdx)) { return; } m_factory.CopyImageMean(strTrainingSrc, strTestingSrc); m_factory.UpdateSourceCounts(); updateLabels(m_factory); m_factory.Close(); log.WriteLine("Done loading training and testing data."); using (DNNEntities entities = EntitiesConnection.CreateEntities()) { List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList(); List <Source> rgSrcTesting = entities.Sources.Where(p => p.Name == strTestingSrc).ToList(); if (rgSrcTraining.Count == 0) { throw new Exception("Could not find the training source '" + strTrainingSrc + "'."); } if (rgSrcTesting.Count == 0) { throw new Exception("Could not find the tesing source '" + strTestingSrc + "'."); } DataConfigSetting dsName = config.Settings.Find("Output Dataset Name"); int nSrcTestingCount = rgSrcTesting[0].ImageCount.GetValueOrDefault(); int nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault(); int nSrcTotalCount = nSrcTestingCount + nSrcTrainingCount; double dfTestingPct = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount; Dataset ds = new Dataset(); ds.ImageHeight = rgSrcTraining[0].ImageHeight; ds.ImageWidth = rgSrcTraining[0].ImageWidth; ds.Name = strDsName; ds.ImageEncoded = rgSrcTesting[0].ImageEncoded; ds.ImageChannels = rgSrcTesting[0].ImageChannels; ds.TestingPercent = (decimal)dfTestingPct; ds.TestingSourceID = rgSrcTesting[0].ID; ds.TestingTotal = rgSrcTesting[0].ImageCount; ds.TrainingSourceID = rgSrcTraining[0].ID; ds.TrainingTotal = rgSrcTraining[0].ImageCount; ds.DatasetCreatorID = config.ID; ds.DatasetGroupID = 0; ds.ModelGroupID = 0; entities.Datasets.Add(ds); entities.SaveChanges(); } } catch (Exception excpt) { log.WriteLine("ERROR: " + excpt.Message); } finally { Properties.Settings.Default.TrainingDataFile1 = strTrainingBatchFile1; Properties.Settings.Default.TrainingDataFile2 = strTrainingBatchFile2; Properties.Settings.Default.TrainingDataFile3 = strTrainingBatchFile3; Properties.Settings.Default.TrainingDataFile4 = strTrainingBatchFile4; Properties.Settings.Default.TrainingDataFile5 = strTrainingBatchFile5; Properties.Settings.Default.TestingDataFile = strTestingBatchFile; Properties.Settings.Default.Save(); if (m_bCancel) { log.WriteLine("ABORTED converting CIFAR data files."); } else { log.WriteLine("Done converting CIFAR data files."); } if (m_bCancel) { m_iprogress.OnCompleted(new CreateProgressArgs(nIdx, nTotal, "ABORTED!", null, true)); } else { m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED.")); } } }
public uint ConvertData(string strImageFile, string strLabelFile, string strDBPath, string strDBPathMean, bool bCreateImgMean, bool bGetItemCountOnly = false, int nChannels = 1) { string strExt; List <SimpleDatum> rgImg = new List <SimpleDatum>(); strExt = Path.GetExtension(strImageFile).ToLower(); if (strExt == ".gz") { m_log.WriteLine("Unpacking '" + strImageFile + "'..."); strImageFile = expandFile(strImageFile); } strExt = Path.GetExtension(strLabelFile).ToLower(); if (strExt == ".gz") { m_log.WriteLine("Unpacking '" + strLabelFile + "'..."); strLabelFile = expandFile(strLabelFile); } BinaryFile image_file = new BinaryFile(strImageFile); BinaryFile label_file = new BinaryFile(strLabelFile); try { uint magicImg = image_file.ReadUInt32(); uint magicLbl = label_file.ReadUInt32(); if (magicImg != 2051) { if (m_log != null) { m_log.FAIL("Incorrect image file magic."); } if (OnLoadError != null) { OnLoadError(this, new LoadErrorArgs("Incorrect image file magic.")); } } if (magicLbl != 2049) { if (m_log != null) { m_log.FAIL("Incorrect label file magic."); } if (OnLoadError != null) { OnLoadError(this, new LoadErrorArgs("Incorrect label file magic.")); } } uint num_items = image_file.ReadUInt32(); uint num_labels = label_file.ReadUInt32(); if (num_items != num_labels) { if (m_log != null) { m_log.FAIL("The number of items must equal the number of labels."); } throw new Exception("The number of items must equal the number of labels." + Environment.NewLine + " Label File: '" + strLabelFile + Environment.NewLine + " Image File: '" + strImageFile + "'."); } if (bGetItemCountOnly) { return(num_items); } uint rows = image_file.ReadUInt32(); uint cols = image_file.ReadUInt32(); int nSrcId = m_factory.AddSource(strDBPath, nChannels, (int)cols, (int)rows, false, 0, true); m_factory.Open(nSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. m_factory.DeleteSourceData(); // Storing to db byte[] rgLabel; byte[] rgPixels; Datum datum = new Datum(false, nChannels, (int)cols, (int)rows); if (m_log != null) { m_log.WriteHeader("LOADING " + strDBPath + " items."); m_log.WriteLine("A total of " + num_items.ToString() + " items."); m_log.WriteLine("Rows: " + rows.ToString() + " Cols: " + cols.ToString()); } if (OnLoadStart != null) { OnLoadStart(this, new LoadStartArgs((int)num_items)); } for (int item_id = 0; item_id < num_items; item_id++) { rgPixels = image_file.ReadBytes((int)(rows * cols)); rgLabel = label_file.ReadBytes(1); List <byte> rgData = new List <byte>(rgPixels); if (nChannels == 3) { rgData.AddRange(new List <byte>(rgPixels)); rgData.AddRange(new List <byte>(rgPixels)); } datum.SetData(rgData, (int)rgLabel[0]); if (m_bmpTargetOverlay != null) { datum = createTargetOverlay(datum); } m_factory.PutRawImageCache(item_id, datum); if (bCreateImgMean) { rgImg.Add(new SimpleDatum(datum)); } if ((item_id % 1000) == 0) { if (m_log != null) { m_log.WriteLine("Loaded " + item_id.ToString("N") + " items..."); m_log.Progress = (double)item_id / (double)num_items; } if (OnLoadProgress != null) { LoadArgs args = new LoadArgs(item_id); OnLoadProgress(this, args); if (args.Cancel) { break; } } } } m_factory.ClearImageCache(true); m_factory.UpdateSourceCounts(); if (bCreateImgMean) { if (strDBPath != strDBPathMean) { m_factory.CopyImageMean(strDBPathMean, strDBPath); } else { m_log.WriteLine("Creating image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(m_log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); } } if (OnLoadProgress != null) { LoadArgs args = new LoadArgs((int)num_items); OnLoadProgress(this, args); } return(num_items); } finally { image_file.Dispose(); label_file.Dispose(); } }