public void TestIndexQuery() { PreTest.Init(); Log log = new Log("Test Dataset Factory"); log.EnableTrace = true; string strDs = "MNIST"; DatasetFactory factory = new DatasetFactory(); Stopwatch sw = new Stopwatch(); try { DatasetDescriptor ds = factory.LoadDataset(strDs); factory.Open(ds.TrainingSource.ID); sw.Start(); List <DbItem> rgItems = factory.LoadImageIndexes(false); sw.Stop(); log.CHECK_EQ(rgItems.Count, ds.TrainingSource.ImageCount, "The query count should match the image count!"); factory.Close(); log.WriteLine("Query time = " + sw.Elapsed.TotalMilliseconds.ToString("N5") + " ms."); sw.Restart(); int nMin = int.MaxValue; int nMax = -int.MaxValue; for (int i = 0; i < rgItems.Count; i++) { nMin = Math.Min(rgItems[i].Label, nMin); nMax = Math.Max(rgItems[i].Label, nMax); } List <DbItem> rgBoosted = rgItems.Where(p => p.Boost > 0).ToList(); for (int nLabel = nMin; nLabel <= nMax; nLabel++) { List <DbItem> rgLabel = rgItems.Where(p => p.Label == nLabel).ToList(); } sw.Stop(); log.WriteLine("Query time (profile) = " + sw.Elapsed.TotalMilliseconds.ToString("N5") + " ms."); } finally { factory.Dispose(); } }
/// <summary> /// The constructor. /// </summary> /// <param name="random">Specifies the random number generator.</param> /// <param name="src">Specifies the data source.</param> /// <param name="nLoadLimit">Optionally, specifies the load limit used which when set to a value > 0, limits queries to RANDOM image selection within the load limit count (default = 0).</param> public MasterIndexes(CryptoRandom random, SourceDescriptor src, int nLoadLimit = 0) { m_random = random; m_src = src; m_factory.Open(src); m_nLoadLimit = nLoadLimit; if (m_nLoadLimit > src.ImageCount) { m_nLoadLimit = src.ImageCount; } m_rgImageIdx = m_factory.LoadImageIndexes(false); load(m_rgImageIdx.Where(p => p != null).ToList()); }
public void TestPutRawImage(bool bSaveImagesToFile) { DatasetFactory factory = new DatasetFactory(); factory.DeleteSources("Test123"); int nSrcId = factory.AddSource("Test123", 1, 10, 10, false, 0, bSaveImagesToFile); factory.Open(nSrcId, 10); byte[] rgBytes = new byte[10 * 10]; for (int i = 0; i < 20; i++) { rgBytes[i] = (byte)i; SimpleDatum sd = new SimpleDatum(false, 1, 10, 10, i, DateTime.MinValue, rgBytes.ToList(), null, 0, false, i); factory.PutRawImageCache(i, sd); } factory.ClearImageCash(true); List <RawImage> rgImg = factory.GetRawImagesAt(0, 20); for (int i = 0; i < rgImg.Count; i++) { SimpleDatum sd = factory.LoadDatum(rgImg[i]); bool bEncoded = false; byte[] rgData = sd.GetByteData(out bEncoded); for (int j = 0; j < 100; j++) { if (j <= i) { Assert.AreEqual(rgData[j], j); } else { Assert.AreEqual(rgData[j], 0); } } } factory.DeleteSources("Test123"); factory.Close(); }
/// <summary> /// The ImageSet constructor. /// </summary> /// <param name="factory">Specifies the DatasetFactory.</param> /// <param name="src">Specifies the data source.</param> /// <param name="loadMethod">Specifies the method to use when loading the images.</param> /// <param name="nLoadLimit">Specifies the image load limit.</param> public ImageSet(DatasetFactory factory, SourceDescriptor src, IMAGEDB_LOAD_METHOD loadMethod, int nLoadLimit) { m_factory = new DatasetFactory(factory); m_factory.Open(src.ID); m_loadMethod = loadMethod; m_nLoadLimit = nLoadLimit; m_src = new SourceDescriptor(src); m_imgMean = null; m_rgImages = new SimpleDatum[m_src.ImageCount]; foreach (LabelDescriptor label in src.Labels) { if (label.ImageCount > 0) { m_rgLabelSet.Add(new LabelSet(label)); } } }
private void loadFile(string strImagesFile, string strLabelsFile, string strSourceName) { Stopwatch sw = new Stopwatch(); reportProgress(0, 0, " Source: " + strSourceName); reportProgress(0, 0, " loading " + strImagesFile + "..."); BinaryFile image_file = new app.BinaryFile(strImagesFile); BinaryFile label_file = new app.BinaryFile(strLabelsFile); Log log = new Log("MNIST"); log.OnWriteLine += Log_OnWriteLine; try { // Verify the files uint magicImg = image_file.ReadUInt32(); uint magicLbl = label_file.ReadUInt32(); if (magicImg != 2051) { throw new Exception("Incorrect image file magic."); } if (magicLbl != 2049) { throw new Exception("Incorrect label file magic."); } uint num_items = image_file.ReadUInt32(); uint num_labels = label_file.ReadUInt32(); if (num_items != num_labels) { throw new Exception("The number of items must be equal to the number of labels!"); } // Add the data source to the database. uint rows = image_file.ReadUInt32(); uint cols = image_file.ReadUInt32(); int nChannels = 1; // black and white int nSrcId = m_factory.AddSource(strSourceName, nChannels, (int)cols, (int)rows, false, 0, true); m_factory.Open(nSrcId); m_factory.DeleteSourceData(); // Storing to database; byte[] rgLabel; byte[] rgPixels; Datum datum = new Datum(false, nChannels, (int)cols, (int)rows, -1, DateTime.MinValue, null, null, 0, false, -1); reportProgress(0, (int)num_items, " loading a total of " + num_items.ToString() + " items."); reportProgress(0, (int)num_items, " (with rows: " + rows.ToString() + ", cols: " + cols.ToString() + ")"); sw.Start(); List <SimpleDatum> rgImg = new List <SimpleDatum>(); for (int i = 0; i < num_items; i++) { rgPixels = image_file.ReadBytes((int)(rows * cols)); rgLabel = label_file.ReadBytes(1); if (sw.Elapsed.TotalMilliseconds > 1000) { reportProgress(i, (int)num_items, " loading data..."); sw.Restart(); } datum.SetData(rgPixels.ToList(), (int)rgLabel[0]); m_factory.PutRawImageCache(i, datum); rgImg.Add(new SimpleDatum(datum)); } m_factory.ClearImageCashe(true); m_factory.UpdateSourceCounts(); m_factory.SaveImageMean(SimpleDatum.CalculateMean(log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true); reportProgress((int)num_items, (int)num_items, " loading completed."); } finally { image_file.Dispose(); label_file.Dispose(); } }
private bool loadFile(string strImagesFile, string strSourceName, int nExtractTotal, ref int nExtractIdx, int nTotal, ref int nIdx, Log log, bool bExtractFiles, Dictionary <string, int> rgNameToLabel) { Stopwatch sw = new Stopwatch(); reportProgress(nIdx, nTotal, " Source: " + strSourceName); reportProgress(nIdx, nTotal, " loading " + strImagesFile + "..."); FileStream fs = null; try { int nSrcId = m_factory.AddSource(strSourceName, 3, -1, -1, false); addLabels(nSrcId, rgNameToLabel); m_factory.Open(nSrcId, 500, Database.FORCE_LOAD.NONE, log); int nPos = strImagesFile.ToLower().LastIndexOf(".tar"); string strPath = strImagesFile.Substring(0, nPos); if (!Directory.Exists(strPath)) { Directory.CreateDirectory(strPath); } if (bExtractFiles) { log.Progress = (double)nIdx / nExtractTotal; log.WriteLine("Extracting files from '" + strImagesFile + "'..."); if ((nExtractIdx = TarFile.ExtractTar(strImagesFile, strPath, m_evtCancel, log, nExtractTotal, nExtractIdx)) == 0) { log.WriteLine("Aborted."); return(false); } } // Load the annotations. SimpleDatum.ANNOTATION_TYPE type = SimpleDatum.ANNOTATION_TYPE.BBOX; int nResizeHeight = 0; int nResizeWidth = 0; // Create the training database images. // Create the master list file. List <Tuple <string, string> > rgFiles = createFileList(log, strPath); sw.Start(); for (int i = 0; i < rgFiles.Count; i++) { SimpleDatum datum = loadDatum(log, rgFiles[i].Item1, rgFiles[i].Item2, nResizeHeight, nResizeWidth, type, rgNameToLabel); m_factory.PutRawImageCache(nIdx, datum); nIdx++; if (m_evtCancel.WaitOne(0)) { log.WriteLine("Aborted."); return(false); } if (sw.Elapsed.TotalMilliseconds > 1000) { log.Progress = (double)nIdx / nTotal; log.WriteLine("Loading file " + i.ToString() + " of " + rgFiles.Count.ToString() + "..."); sw.Restart(); } } m_factory.ClearImageCashe(true); m_factory.Close(); } finally { if (fs != null) { fs.Dispose(); } } return(true); }
public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress) { string strCsvFile = Properties.Settings.Default.CsvFile; string strDsName = config.Name; string strTrainingSrc = config.Name + ".training"; string strTestingSrc = config.Name + ".testing"; m_bCancel = false; m_iprogress = progress; m_factory.DeleteSources(strTrainingSrc, strTestingSrc); Log log = new Log("CSV Dataset Creator"); log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine); try { //----------------------------------------- // Load the schema that defines the layout // of the CSV file. //----------------------------------------- m_schema = loadSchema(config.Settings); //----------------------------------------- // Load and parse the CSV file. //----------------------------------------- DataConfigSetting dsCsvFile = config.Settings.Find("CSV File"); strCsvFile = dsCsvFile.Value.ToString(); if (strCsvFile.Length == 0) { throw new Exception("CSV data file name not specified!"); } log.WriteLine("Loading the data file..."); if (m_bCancel) { return; } m_parser.Load(strCsvFile, m_schema); //----------------------------------------- // Split the data into training and testing // sets. //----------------------------------------- List <DataItem> rgTraining = new List <DataItem>(); List <DataItem> rgTesting = new List <DataItem>(); DataConfigSetting dsPctTesting = config.Settings.Find("Testing Percentage"); double dfVal = (double)dsPctTesting.Value; Random random = new Random(); for (int i = 0; i < m_parser.Data.Count; i++) { if (random.NextDouble() > dfVal) { rgTraining.Add(m_parser.Data[i]); } else { rgTesting.Add(m_parser.Data[i]); } } Properties.Settings.Default.TestingPct = dfVal; //----------------------------------------- // Create the training data source. //----------------------------------------- int nCellHorizCount = 0; List <int> rgDim = getImageDim(m_parser, m_schema, out nCellHorizCount); int nTrainSrcId = m_factory.AddSource(strTrainingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTraining, rgDim, true, true)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); log.WriteLine("Creating the image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); m_rgImages.Clear(); m_factory.Close(); //----------------------------------------- // Create the testing data source. //----------------------------------------- int nTestSrcId = m_factory.AddSource(strTestingSrc, rgDim[0], rgDim[1], rgDim[2], false, 0); m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadData(log, m_factory, m_parser, rgTesting, rgDim, false, false)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); m_factory.Close(); //----------------------------------------- // Crate the data set. //----------------------------------------- log.WriteLine("Done loading training and testing data."); int nDatasetID = 0; using (DNNEntities entities = EntitiesConnection.CreateEntities()) { List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList(); List <Source> rgSrcTesting = entities.Sources.Where(p => p.Name == strTestingSrc).ToList(); if (rgSrcTraining.Count == 0) { throw new Exception("Could not find the training source '" + strTrainingSrc + "'."); } if (rgSrcTesting.Count == 0) { throw new Exception("Could not find the tesing source '" + strTestingSrc + "'."); } DataConfigSetting dsName = config.Settings.Find("Output Dataset Name"); int nSrcTestingCount = rgSrcTesting[0].ImageCount.GetValueOrDefault(); int nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault(); int nSrcTotalCount = nSrcTestingCount + nSrcTrainingCount; double dfTestingPct = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount; Dataset ds = new Dataset(); ds.ImageHeight = rgSrcTraining[0].ImageHeight; ds.ImageWidth = rgSrcTraining[0].ImageWidth; ds.Name = strDsName; ds.ImageEncoded = rgSrcTesting[0].ImageEncoded; ds.ImageChannels = rgSrcTesting[0].ImageChannels; ds.TestingPercent = (decimal)dfTestingPct; ds.TestingSourceID = rgSrcTesting[0].ID; ds.TestingTotal = rgSrcTesting[0].ImageCount; ds.TrainingSourceID = rgSrcTraining[0].ID; ds.TrainingTotal = rgSrcTraining[0].ImageCount; ds.DatasetCreatorID = config.ID; ds.DatasetGroupID = 0; ds.ModelGroupID = 0; entities.Datasets.Add(ds); entities.SaveChanges(); nDatasetID = ds.ID; } m_factory.SetDatasetParameter(nDatasetID, "PixelSize", m_schema.CellSize.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCount", m_parser.DataDescriptions.Count.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountHoriz", nCellHorizCount.ToString()); m_factory.SetDatasetParameter(nDatasetID, "AttributeCountVert", nCellHorizCount.ToString()); } catch (Exception excpt) { log.WriteLine("ERROR: " + excpt.Message); } finally { Properties.Settings.Default.CsvFile = strCsvFile; Properties.Settings.Default.Save(); if (m_bCancel) { log.WriteLine("ABORTED converting CSV data files."); } else { log.WriteLine("Done converting CSV data files."); } if (m_bCancel) { m_iprogress.OnCompleted(new CreateProgressArgs(1, 1, "ABORTED!", null, true)); } else { m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED.")); } } }
private ImageSet loadImageset(string strType, SourceDescriptor src, WaitHandle[] rgAbort, ref SimpleDatum imgMean, out int nLastImageIdx, int nPadW = 0, int nPadH = 0, Log log = null, IMAGEDB_LOAD_METHOD loadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL, int nImageDbLoadLimit = 0, int nImageDbLoadLimitStartIdx = 0, bool bLoadNext = false) { try { RawImageMean imgMeanRaw = null; m_factory.Open(src); nLastImageIdx = nImageDbLoadLimitStartIdx; if (loadMethod != IMAGEDB_LOAD_METHOD.LOAD_ALL) { if (imgMean == null) { imgMeanRaw = m_factory.GetRawImageMean(); if (imgMeanRaw == null) { if (log != null) { log.WriteLine("WARNING: No image mean exists in the database, changing image database load from " + loadMethod.ToString() + " to " + IMAGEDB_LOAD_METHOD.LOAD_ALL.ToString()); } loadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL; } } } int nCount = src.ImageCount; if (nCount == 0) { throw new Exception("Could not find any images with " + strType + " Source = '" + src.Name + "'."); } if (log != null) { log.WriteLine("Loading '" + src.Name + "' - " + nCount.ToString("N0") + " images."); } ImageSet imgset = new ImageSet(m_factory, src, loadMethod, nImageDbLoadLimit); if (OnCalculateImageMean != null) { imgset.OnCalculateImageMean += OnCalculateImageMean; } if (loadMethod != IMAGEDB_LOAD_METHOD.LOAD_ON_DEMAND) { bool bDataIsReal = src.IsRealData; int nBatchSize = 20000; Stopwatch sw = new Stopwatch(); int nImageSize = src.ImageHeight * src.ImageWidth; if (nImageSize > 60000) { nBatchSize = 5000; } else if (nBatchSize > 20000) { nBatchSize = 7500; } else if (nImageSize > 3000) { nBatchSize = 10000; } if (nImageDbLoadLimit <= 0) { nImageDbLoadLimit = nCount; } List <int> rgIdx = getIndexList(nImageDbLoadLimitStartIdx, nImageDbLoadLimit); int nIdx = 0; sw.Start(); while (nIdx < rgIdx.Count) { int nImageIdx = rgIdx[nIdx]; int nImageCount = Math.Min(rgIdx.Count - nIdx, nBatchSize); List <RawImage> rgImg = m_factory.GetRawImagesAt(nImageIdx, nImageCount); for (int j = 0; j < rgImg.Count; j++) { SimpleDatum sd1 = m_factory.LoadDatum(rgImg[j], nPadW, nPadH); imgset.Add(nIdx + j, sd1); if (sw.Elapsed.TotalMilliseconds > 1000) { if (log != null) { double dfPct = (double)(nIdx + j) / (double)nCount; log.Progress = dfPct; log.WriteLine("image loading at " + dfPct.ToString("P") + "..."); } sw.Restart(); if (EventWaitHandle.WaitAny(rgAbort, 0) != EventWaitHandle.WaitTimeout) { return(null); } } } nIdx += rgImg.Count; if (loadMethod == IMAGEDB_LOAD_METHOD.LOAD_ALL && rgImg.Count == 0 && nIdx < nCount) { log.WriteLine("WARNING: Loaded " + nIdx.ToString("N0") + " images, yet " + (nCount - nIdx).ToString("N0") + " images are unaccounted for. You may need to reindex the dataset."); break; } } if (log != null) { log.Progress = 0; } if (rgIdx.Count > 0) { nLastImageIdx = rgIdx[rgIdx.Count - 1] + 1; } } else if (bLoadNext) { nLastImageIdx += nImageDbLoadLimit; } if (imgMean == null) { if (imgMeanRaw == null) { imgMeanRaw = m_factory.GetRawImageMean(); } if (imgMeanRaw != null) { imgMean = m_factory.LoadDatum(imgMeanRaw, nPadW, nPadH); } else { if (log != null) { log.WriteLine("Calculating mean..."); } imgMean = imgset.GetImageMean(log, rgAbort); m_factory.PutRawImageMean(imgMean, true); } } if (imgMean != null) { imgset.SetImageMean(imgMean); } imgset.CompleteLoad(nLastImageIdx); return(imgset); } finally { m_factory.Close(); } }
private void loadFile(string strImagesFile, string strSourceName, int nTotal, ref int nIdx) { Stopwatch sw = new Stopwatch(); int nStart = nIdx; reportProgress(nIdx, nTotal, " Source: " + strSourceName); reportProgress(nIdx, nTotal, " loading " + strImagesFile + "..."); FileStream fs = null; try { fs = new FileStream(strImagesFile, FileMode.Open, FileAccess.Read); using (BinaryReader br = new BinaryReader(fs)) { fs = null; int nSrcId = m_factory.AddSource(strSourceName, 3, 32, 32, false, 0, true); m_factory.Open(nSrcId); if (nIdx == 0) { m_factory.DeleteSourceData(); } sw.Start(); for (int i = 0; i < 10000; i++) { int nLabel = (int)br.ReadByte(); byte[] rgImgBytes = br.ReadBytes(3072); Bitmap img = createImage(rgImgBytes); Datum d = ImageData.GetImageData(img, 3, false, nLabel); m_factory.PutRawImageCache(nIdx, d); m_rgImg.Add(new SimpleDatum(d)); nIdx++; if (sw.ElapsedMilliseconds > 1000) { reportProgress(nStart + i, nTotal, "loading " + strImagesFile + " " + i.ToString("N0") + " of 10,000..."); sw.Restart(); } } m_factory.ClearImageCash(true); if (nIdx == nTotal) { m_factory.UpdateSourceCounts(); } } } finally { if (fs != null) { fs.Dispose(); } } }
/// <summary> /// The dataLoadThread is responsible for loading the data source images in the background. /// </summary> private void dataLoadThread() { m_evtRunning.Set(); DatasetFactory factory = new DatasetFactory(m_factory); int? nNextIdx = m_loadSequence.GetNext(); Stopwatch sw = new Stopwatch(); if (m_refreshManager != null) { m_refreshManager.Reset(); } try { sw.Start(); List <int> rgIdxBatch = new List <int>(); int nBatchSize = getBatchSize(m_src); if (m_nLoadedCount > 0) { throw new Exception("The loaded count is > 0!"); } factory.Open(m_src); m_log.WriteLine(m_src.Name + " loading " + m_loadSequence.Count.ToString("N0") + " items..."); while (nNextIdx.HasValue || rgIdxBatch.Count > 0) { if (nNextIdx.HasValue) { rgIdxBatch.Add(nNextIdx.Value); } if (rgIdxBatch.Count >= nBatchSize || !nNextIdx.HasValue) { List <RawImage> rgImg; if (m_refreshManager == null) { rgImg = factory.GetRawImagesAt(rgIdxBatch[0], rgIdxBatch.Count); } else { rgImg = factory.GetRawImagesAt(rgIdxBatch, m_evtCancel); } if (rgImg == null) { break; } for (int j = 0; j < rgImg.Count; j++) { SimpleDatum sd = factory.LoadDatum(rgImg[j]); if (m_refreshManager != null) { m_refreshManager.AddLoaded(sd); } m_rgImages[m_nLoadedCount] = sd; m_nLoadedCount++; if (sw.Elapsed.TotalMilliseconds > 1000) { if (m_log != null && !m_bSilent) { double dfPct = m_nLoadedCount / (double)m_rgImages.Length; m_log.Progress = dfPct; m_log.WriteLine("Loading '" + m_src.Name + "' at " + dfPct.ToString("P") + " (" + m_nLoadedCount.ToString("N0") + " of " + m_rgImages.Length.ToString("N0") + ")..."); } int nWait = WaitHandle.WaitAny(m_rgAbort.ToArray(), 0); if (nWait != WaitHandle.WaitTimeout) { return; } sw.Restart(); } } rgIdxBatch = new List <int>(); } nNextIdx = m_loadSequence.GetNext(); } if (rgIdxBatch.Count > 0) { m_log.FAIL("Not all images were loaded!"); } } finally { factory.Close(); factory.Dispose(); m_evtRunning.Reset(); m_evtDone.Set(); } }
/// <summary> /// The ImageSet constructor. /// </summary> /// <param name="factory">Specifies the DatasetFactory.</param> /// <param name="src">Specifies the data source.</param> public ImageSetBase(DatasetFactory factory, SourceDescriptor src) { m_src = new SourceDescriptor(src); m_factory = new DatasetFactory(factory); m_factory.Open(src.ID); }
public void Create(DatasetConfiguration config, IXDatasetCreatorProgress progress) { string strTrainingBatchFile1 = Properties.Settings.Default.TrainingDataFile1; string strTrainingBatchFile2 = Properties.Settings.Default.TrainingDataFile2; string strTrainingBatchFile3 = Properties.Settings.Default.TrainingDataFile3; string strTrainingBatchFile4 = Properties.Settings.Default.TrainingDataFile4; string strTrainingBatchFile5 = Properties.Settings.Default.TrainingDataFile5; string strTestingBatchFile = Properties.Settings.Default.TestingDataFile; string strDsName = config.Name; string strTrainingSrc = config.Name + ".training"; string strTestingSrc = config.Name + ".testing"; int nIdx = 0; int nTotal = 50000; m_bCancel = false; m_iprogress = progress; m_factory.DeleteSources(strTrainingSrc, strTestingSrc); Log log = new Log("CIFAR Dataset Creator"); log.OnWriteLine += new EventHandler <LogArg>(log_OnWriteLine); try { DataConfigSetting dsTrainingDataFile1 = config.Settings.Find("Training Data File 1"); DataConfigSetting dsTrainingDataFile2 = config.Settings.Find("Training Data File 2"); DataConfigSetting dsTrainingDataFile3 = config.Settings.Find("Training Data File 3"); DataConfigSetting dsTrainingDataFile4 = config.Settings.Find("Training Data File 4"); DataConfigSetting dsTrainingDataFile5 = config.Settings.Find("Training Data File 5"); DataConfigSetting dsTestingDataFile = config.Settings.Find("Testing Data File"); strTrainingBatchFile1 = dsTrainingDataFile1.Value.ToString(); if (strTrainingBatchFile1.Length == 0) { throw new Exception("Training data file #1 name not specified!"); } strTrainingBatchFile2 = dsTrainingDataFile2.Value.ToString(); if (strTrainingBatchFile2.Length == 0) { throw new Exception("Training data file #2 name not specified!"); } strTrainingBatchFile3 = dsTrainingDataFile3.Value.ToString(); if (strTrainingBatchFile3.Length == 0) { throw new Exception("Training data file #3 name not specified!"); } strTrainingBatchFile4 = dsTrainingDataFile4.Value.ToString(); if (strTrainingBatchFile4.Length == 0) { throw new Exception("Training data file #4 name not specified!"); } strTrainingBatchFile5 = dsTrainingDataFile5.Value.ToString(); if (strTrainingBatchFile5.Length == 0) { throw new Exception("Training data file #5 name not specified!"); } strTestingBatchFile = dsTestingDataFile.Value.ToString(); if (strTestingBatchFile.Length == 0) { throw new Exception("Testing data file name not specified!"); } log.WriteLine("Loading the data files..."); if (m_bCancel) { return; } int nTrainSrcId = m_factory.AddSource(strTrainingSrc, 3, 32, 32, false, 0); m_factory.Open(nTrainSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); if (!loadFile(log, dsTrainingDataFile1.Name, strTrainingBatchFile1, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile2.Name, strTrainingBatchFile2, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile3.Name, strTrainingBatchFile3, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile4.Name, strTrainingBatchFile4, m_factory, nTotal, true, ref nIdx)) { return; } if (!loadFile(log, dsTrainingDataFile5.Name, strTrainingBatchFile5, m_factory, nTotal, true, ref nIdx)) { return; } m_factory.UpdateSourceCounts(); updateLabels(m_factory); log.WriteLine("Creating the image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(log, m_rgImages.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); m_rgImages.Clear(); m_factory.Close(); int nTestSrcId = m_factory.AddSource(strTestingSrc, 3, 32, 32, false, 0); m_factory.Open(nTestSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. log.WriteLine("Deleting existing data from '" + m_factory.OpenSource.Name + "'."); m_factory.DeleteSourceData(); nIdx = 0; nTotal = 10000; if (!loadFile(log, dsTestingDataFile.Name, strTestingBatchFile, m_factory, nTotal, false, ref nIdx)) { return; } m_factory.CopyImageMean(strTrainingSrc, strTestingSrc); m_factory.UpdateSourceCounts(); updateLabels(m_factory); m_factory.Close(); log.WriteLine("Done loading training and testing data."); using (DNNEntities entities = EntitiesConnection.CreateEntities()) { List <Source> rgSrcTraining = entities.Sources.Where(p => p.Name == strTrainingSrc).ToList(); List <Source> rgSrcTesting = entities.Sources.Where(p => p.Name == strTestingSrc).ToList(); if (rgSrcTraining.Count == 0) { throw new Exception("Could not find the training source '" + strTrainingSrc + "'."); } if (rgSrcTesting.Count == 0) { throw new Exception("Could not find the tesing source '" + strTestingSrc + "'."); } DataConfigSetting dsName = config.Settings.Find("Output Dataset Name"); int nSrcTestingCount = rgSrcTesting[0].ImageCount.GetValueOrDefault(); int nSrcTrainingCount = rgSrcTraining[0].ImageCount.GetValueOrDefault(); int nSrcTotalCount = nSrcTestingCount + nSrcTrainingCount; double dfTestingPct = (nSrcTrainingCount == 0) ? 0.0 : nSrcTestingCount / (double)nSrcTotalCount; Dataset ds = new Dataset(); ds.ImageHeight = rgSrcTraining[0].ImageHeight; ds.ImageWidth = rgSrcTraining[0].ImageWidth; ds.Name = strDsName; ds.ImageEncoded = rgSrcTesting[0].ImageEncoded; ds.ImageChannels = rgSrcTesting[0].ImageChannels; ds.TestingPercent = (decimal)dfTestingPct; ds.TestingSourceID = rgSrcTesting[0].ID; ds.TestingTotal = rgSrcTesting[0].ImageCount; ds.TrainingSourceID = rgSrcTraining[0].ID; ds.TrainingTotal = rgSrcTraining[0].ImageCount; ds.DatasetCreatorID = config.ID; ds.DatasetGroupID = 0; ds.ModelGroupID = 0; entities.Datasets.Add(ds); entities.SaveChanges(); } } catch (Exception excpt) { log.WriteLine("ERROR: " + excpt.Message); } finally { Properties.Settings.Default.TrainingDataFile1 = strTrainingBatchFile1; Properties.Settings.Default.TrainingDataFile2 = strTrainingBatchFile2; Properties.Settings.Default.TrainingDataFile3 = strTrainingBatchFile3; Properties.Settings.Default.TrainingDataFile4 = strTrainingBatchFile4; Properties.Settings.Default.TrainingDataFile5 = strTrainingBatchFile5; Properties.Settings.Default.TestingDataFile = strTestingBatchFile; Properties.Settings.Default.Save(); if (m_bCancel) { log.WriteLine("ABORTED converting CIFAR data files."); } else { log.WriteLine("Done converting CIFAR data files."); } if (m_bCancel) { m_iprogress.OnCompleted(new CreateProgressArgs(nIdx, nTotal, "ABORTED!", null, true)); } else { m_iprogress.OnCompleted(new CreateProgressArgs(1, "COMPLETED.")); } } }
private bool loadFile(DatasetFactory factory, List <Tuple <byte[], int> > rgData, int nC, int nH, int nW, string strSourceName, string strExportPath) { if (strExportPath != null) { strExportPath += strSourceName; if (!Directory.Exists(strExportPath)) { Directory.CreateDirectory(strExportPath); } } Stopwatch sw = new Stopwatch(); reportProgress(0, 0, " Source: " + strSourceName); try { if (factory != null) { int nSrcId = factory.AddSource(strSourceName, nC, nW, nH, false, 0, true); factory.Open(nSrcId, 500, Database.FORCE_LOAD.NONE, m_log); factory.DeleteSourceData(); } // Storing to database; int nLabel; byte[] rgPixels; Datum datum = new Datum(false, nC, nW, nH, -1, DateTime.MinValue, new List <byte>(), 0, false, -1); string strAction = (m_param.ExportToFile) ? "exporing" : "loading"; reportProgress(0, rgData.Count, " " + strAction + " a total of " + rgData.Count.ToString() + " items."); reportProgress(0, rgData.Count, " (with rows: " + nH.ToString() + ", cols: " + nW.ToString() + ")"); sw.Start(); List <SimpleDatum> rgImg = new List <SimpleDatum>(); FileStream fsFileDesc = null; StreamWriter swFileDesc = null; if (m_param.ExportToFile) { string strFile = strExportPath + "\\file_list.txt"; fsFileDesc = File.OpenWrite(strFile); swFileDesc = new StreamWriter(fsFileDesc); } for (int i = 0; i < rgData.Count; i++) { rgPixels = rgData[i].Item1; nLabel = rgData[i].Item2; if (sw.Elapsed.TotalMilliseconds > 1000) { reportProgress(i, rgData.Count, " " + strAction + " data..."); sw.Restart(); } datum.SetData(rgPixels, nLabel); if (factory != null) { factory.PutRawImageCache(i, datum, 5); } else if (strExportPath != null) { saveToFile(strExportPath, i, datum, swFileDesc); } rgImg.Add(new SimpleDatum(datum)); if (m_evtCancel.WaitOne(0)) { return(false); } } if (swFileDesc != null) { swFileDesc.Flush(); swFileDesc.Close(); swFileDesc.Dispose(); fsFileDesc.Close(); fsFileDesc.Dispose(); } if (factory != null) { factory.ClearImageCache(true); factory.UpdateSourceCounts(); factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true); } reportProgress(rgData.Count, rgData.Count, " " + strAction + " completed."); } finally { } return(true); }
public uint ConvertData(string strImageFile, string strLabelFile, string strDBPath, string strDBPathMean, bool bCreateImgMean, bool bGetItemCountOnly = false, int nChannels = 1) { string strExt; List <SimpleDatum> rgImg = new List <SimpleDatum>(); strExt = Path.GetExtension(strImageFile).ToLower(); if (strExt == ".gz") { m_log.WriteLine("Unpacking '" + strImageFile + "'..."); strImageFile = expandFile(strImageFile); } strExt = Path.GetExtension(strLabelFile).ToLower(); if (strExt == ".gz") { m_log.WriteLine("Unpacking '" + strLabelFile + "'..."); strLabelFile = expandFile(strLabelFile); } BinaryFile image_file = new BinaryFile(strImageFile); BinaryFile label_file = new BinaryFile(strLabelFile); try { uint magicImg = image_file.ReadUInt32(); uint magicLbl = label_file.ReadUInt32(); if (magicImg != 2051) { if (m_log != null) { m_log.FAIL("Incorrect image file magic."); } if (OnLoadError != null) { OnLoadError(this, new LoadErrorArgs("Incorrect image file magic.")); } } if (magicLbl != 2049) { if (m_log != null) { m_log.FAIL("Incorrect label file magic."); } if (OnLoadError != null) { OnLoadError(this, new LoadErrorArgs("Incorrect label file magic.")); } } uint num_items = image_file.ReadUInt32(); uint num_labels = label_file.ReadUInt32(); if (num_items != num_labels) { if (m_log != null) { m_log.FAIL("The number of items must equal the number of labels."); } throw new Exception("The number of items must equal the number of labels." + Environment.NewLine + " Label File: '" + strLabelFile + Environment.NewLine + " Image File: '" + strImageFile + "'."); } if (bGetItemCountOnly) { return(num_items); } uint rows = image_file.ReadUInt32(); uint cols = image_file.ReadUInt32(); int nSrcId = m_factory.AddSource(strDBPath, nChannels, (int)cols, (int)rows, false, 0, true); m_factory.Open(nSrcId, 500, Database.FORCE_LOAD.FROM_FILE); // use file based data. m_factory.DeleteSourceData(); // Storing to db byte[] rgLabel; byte[] rgPixels; Datum datum = new Datum(false, nChannels, (int)cols, (int)rows); if (m_log != null) { m_log.WriteHeader("LOADING " + strDBPath + " items."); m_log.WriteLine("A total of " + num_items.ToString() + " items."); m_log.WriteLine("Rows: " + rows.ToString() + " Cols: " + cols.ToString()); } if (OnLoadStart != null) { OnLoadStart(this, new LoadStartArgs((int)num_items)); } for (int item_id = 0; item_id < num_items; item_id++) { rgPixels = image_file.ReadBytes((int)(rows * cols)); rgLabel = label_file.ReadBytes(1); List <byte> rgData = new List <byte>(rgPixels); if (nChannels == 3) { rgData.AddRange(new List <byte>(rgPixels)); rgData.AddRange(new List <byte>(rgPixels)); } datum.SetData(rgData, (int)rgLabel[0]); if (m_bmpTargetOverlay != null) { datum = createTargetOverlay(datum); } m_factory.PutRawImageCache(item_id, datum); if (bCreateImgMean) { rgImg.Add(new SimpleDatum(datum)); } if ((item_id % 1000) == 0) { if (m_log != null) { m_log.WriteLine("Loaded " + item_id.ToString("N") + " items..."); m_log.Progress = (double)item_id / (double)num_items; } if (OnLoadProgress != null) { LoadArgs args = new LoadArgs(item_id); OnLoadProgress(this, args); if (args.Cancel) { break; } } } } m_factory.ClearImageCache(true); m_factory.UpdateSourceCounts(); if (bCreateImgMean) { if (strDBPath != strDBPathMean) { m_factory.CopyImageMean(strDBPathMean, strDBPath); } else { m_log.WriteLine("Creating image mean..."); SimpleDatum dMean = SimpleDatum.CalculateMean(m_log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }); m_factory.PutRawImageMean(dMean, true); } } if (OnLoadProgress != null) { LoadArgs args = new LoadArgs((int)num_items); OnLoadProgress(this, args); } return(num_items); } finally { image_file.Dispose(); label_file.Dispose(); } }
private bool loadFile(DatasetFactory factory, string strImagesFile, string strLabelsFile, string strSourceName, string strExportPath) { if (strExportPath != null) { strExportPath += strSourceName; if (!Directory.Exists(strExportPath)) { Directory.CreateDirectory(strExportPath); } } Stopwatch sw = new Stopwatch(); reportProgress(0, 0, " Source: " + strSourceName); reportProgress(0, 0, " loading " + strImagesFile + "..."); BinaryFile image_file = new BinaryFile(strImagesFile); BinaryFile label_file = new BinaryFile(strLabelsFile); try { // Verify the files uint magicImg = image_file.ReadUInt32(); uint magicLbl = label_file.ReadUInt32(); if (magicImg != 2051) { throw new Exception("Incorrect image file magic."); } if (magicLbl != 2049) { throw new Exception("Incorrect label file magic."); } uint num_items = image_file.ReadUInt32(); uint num_labels = label_file.ReadUInt32(); if (num_items != num_labels) { throw new Exception("The number of items must be equal to the number of labels!"); } // Add the data source to the database. uint rows = image_file.ReadUInt32(); uint cols = image_file.ReadUInt32(); int nChannels = 1; // black and white if (factory != null) { int nSrcId = factory.AddSource(strSourceName, nChannels, (int)cols, (int)rows, false, 0, true); factory.Open(nSrcId, 500, Database.FORCE_LOAD.NONE, m_log); factory.DeleteSourceData(); } // Storing to database; byte[] rgLabel; byte[] rgPixels; Datum datum = new Datum(false, nChannels, (int)cols, (int)rows, -1, DateTime.MinValue, new List <byte>(), 0, false, -1); string strAction = (m_param.ExportToFile) ? "exporing" : "loading"; reportProgress(0, (int)num_items, " " + strAction + " a total of " + num_items.ToString() + " items."); reportProgress(0, (int)num_items, " (with rows: " + rows.ToString() + ", cols: " + cols.ToString() + ")"); sw.Start(); List <SimpleDatum> rgImg = new List <SimpleDatum>(); FileStream fsFileDesc = null; StreamWriter swFileDesc = null; if (m_param.ExportToFile) { string strFile = strExportPath + "\\file_list.txt"; fsFileDesc = File.OpenWrite(strFile); swFileDesc = new StreamWriter(fsFileDesc); } for (int i = 0; i < num_items; i++) { rgPixels = image_file.ReadBytes((int)(rows * cols)); rgLabel = label_file.ReadBytes(1); if (sw.Elapsed.TotalMilliseconds > 1000) { reportProgress(i, (int)num_items, " " + strAction + " data..."); sw.Restart(); } datum.SetData(rgPixels.ToList(), (int)rgLabel[0]); if (factory != null) { factory.PutRawImageCache(i, datum); } else if (strExportPath != null) { saveToFile(strExportPath, i, datum, swFileDesc); } rgImg.Add(new SimpleDatum(datum)); if (m_evtCancel.WaitOne(0)) { return(false); } } if (swFileDesc != null) { swFileDesc.Flush(); swFileDesc.Close(); swFileDesc.Dispose(); fsFileDesc.Close(); fsFileDesc.Dispose(); } if (factory != null) { factory.ClearImageCashe(true); factory.UpdateSourceCounts(); factory.SaveImageMean(SimpleDatum.CalculateMean(m_log, rgImg.ToArray(), new WaitHandle[] { new ManualResetEvent(false) }), true); } reportProgress((int)num_items, (int)num_items, " " + strAction + " completed."); } finally { image_file.Dispose(); label_file.Dispose(); } return(true); }