public async Task ProcessDocumentsAsync()
        {
            _criteriaFilePaths = new List <string>();
            ClassifyEnabled    = false;
            var tempDirectoryInfo = new DirectoryInfo(Common.TempStorage);
            var tempFiles         = tempDirectoryInfo.GetFiles();

            foreach (var file in tempFiles)
            {
                File.Delete(file.FullName);
            }
            var pdfFiles = new List <string> {
                PdfPath
            };
            var pdfImages = await Common.ConvertPdfsToImagesAsync(pdfFiles);

            var pngImages = await Common.CopyImagesToTempFolderAsync(pdfFiles);

            PdfImages = new Dictionary <string, string>(pdfImages);
            var types = new List <DocumentTypes>();
            List <DocumentCriteria> documentCriteria = null;

            using (var context = new ClassifierContext())
            {
                var dTypes = context.DocumentTypes.ToList();
                foreach (var o in dTypes)
                {
                    if (DocumentSelectionList.First(c => c.DocumentTypeId == o.Id).Selected)
                    {
                        types.Add(o);
                    }
                }
                documentCriteria = context.DocumentCriteria.ToList();
            }
            await Common.CreateCriteriaFilesAsync(documentCriteria, types);

            var criteriaFolder = new DirectoryInfo(Common.CriteriaStorage);

            _criteriaFilePaths = new List <string>();
            var criteriaFiles = criteriaFolder.GetFiles();

            foreach (var o in criteriaFiles)
            {
                _criteriaFilePaths.Add(o.FullName);
            }
            tempDirectoryInfo = new DirectoryInfo(Common.TempStorage);
            var files = tempDirectoryInfo.GetFiles();

            await ProcessSelectedDocumentsAsync(files.ToList());

            ClassifyEnabled = true;
        }
Exemplo n.º 2
0
        public Task ProcessSelectedDocumentsAsync(IProgress <TaskProgress> prog, CancellationToken token, List <DocumentTypes> types, EtaCalculator pc, List <FileInfo> files)
        {
            var currentFile = 0.0;
            var fileCount   = Convert.ToDouble(files.Count);

            return(Task.Run(() =>
            {
                foreach (var file in files)
                {
                    var criteriaMatches = types.Select(o => new CriteriaMatchModel {
                        DocumentType = o
                    }).ToList();
                    using (var observedImage = CvInvoke.Imread(file.FullName))
                    {
                        Parallel.ForEach(_criteriaImages, (criteriaImage) =>
                        {
                            var criteriaFile = criteriaImage.Info;
                            var criteriaFileSplit = criteriaFile.Name.Split('-');
                            var type = types.First(c => c.DocumentType == criteriaFileSplit[0]);
                            var score = Classify(criteriaImage, observedImage);
                            var existingModel = criteriaMatches.First(c => c.DocumentType == type);
                            existingModel.Score += score;
                            existingModel.PdfFile = file.FullName;
                        });
                    }
                    if (token.IsCancellationRequested)
                    {
                        return;
                    }
                    var matchedCriteria = criteriaMatches.First(c => c.Score == criteriaMatches.Max(p => p.Score));
                    Console.WriteLine($"Score: {matchedCriteria.Score}");
                    if (matchedCriteria.Score >= matchedCriteria.DocumentType.AverageScore)
                    {
                        DocumentSelectionList.First(c => c.DocumentType == matchedCriteria.DocumentType.DocumentType).Matches += 1;
                        System.Windows.Application.Current.Dispatcher.Invoke(() =>
                        {
                            SelectionViewSource.Refresh();
                        });
                        var matchedFileName = file.Name.Substring(0, file.Name.Length - 4).Split('.')[0];
                        var matchedFile = PdfFiles.First(c => c.Contains(matchedFileName));
                        var matchedFileExtension = matchedFile.Substring(matchedFile.Length - 3);
                        if (matchedFileExtension.Equals("pdf", StringComparison.CurrentCultureIgnoreCase))
                        {
                            ExtractPageFromPdf(file, matchedCriteria.DocumentType.DocumentType, NamingModels);
                        }
                        else
                        {
                            CreatePdfFromImage(file, matchedCriteria.DocumentType.DocumentType, NamingModels);
                        }
                    }
                    currentFile++;
                    var rawProgress = (currentFile / fileCount);
                    var progress = rawProgress * 100;
                    var progressFloat = (float)rawProgress;
                    pc.Update(progressFloat);
                    if (pc.ETAIsAvailable)
                    {
                        var timeRemaining = pc.ETR.ToString(@"dd\.hh\:mm\:ss");
                        prog.Report(new TaskProgress
                        {
                            ProgressText = file.Name,
                            ProgressPercentage = progress,
                            ProgressText2 = timeRemaining
                        });
                    }
                    else
                    {
                        prog.Report(new TaskProgress
                        {
                            ProgressText = file.Name,
                            ProgressPercentage = progress,
                            ProgressText2 = "Calculating..."
                        });
                    }
                }
            }));
        }
Exemplo n.º 3
0
        public async Task ProcessDocumentsAsync()
        {
            CancelEnabled   = true;
            ClassifyEnabled = false;
            var tempDirectory = new DirectoryInfo(Common.TempStorage);
            var tempFiles     = tempDirectory.GetFiles();

            foreach (var file in tempFiles)
            {
                File.Delete(file.FullName);
            }
            CancelTokenSource = new CancellationTokenSource();
            var token = CancelTokenSource.Token;
            var prog  = new Progress <TaskProgress>();

            prog.ProgressChanged += (sender, exportProgress) =>
            {
                ProgressPercentage = Math.Round(exportProgress.ProgressPercentage, 2);
                ProgressText       = exportProgress.ProgressText;
                ProgressText2      = exportProgress.ProgressText2;
            };
            ProgressText = "Creating PDF Files.";
            var eta       = new EtaCalculator(1, 30);
            var pdfImages = await Common.ConvertPdfsToImagesAsync(PdfFiles, prog, eta);

            var pngImages = await Common.CopyImagesToTempFolderAsync(PdfFiles);

            PdfImages = new Dictionary <string, string>(pdfImages);
            foreach (var png in pngImages)
            {
                PdfImages.Add(png.Key, png.Value);
            }
            var types = new List <DocumentTypes>();
            List <DocumentCriteria> documentCriteria = null;

            using (var context = new ClassifierContext())
            {
                var dTypes = context.DocumentTypes.ToList();
                foreach (var o in dTypes)
                {
                    if (DocumentSelectionList.First(c => c.DocumentTypeId == o.Id).Selected)
                    {
                        types.Add(o);
                    }
                }
                documentCriteria = context.DocumentCriteria.ToList();
            }
            ProgressText       = "Finding Matches";
            ProgressPercentage = 0.0;
            await Common.CreateCriteriaFilesAsync(documentCriteria, types);

            var criteriaAndNaming = Common.SetNamingAndCriteria(NamingSpreadsheetPath);

            NamingModels    = new List <FileNamingModel>(criteriaAndNaming.Item1);
            _criteriaImages = new List <CriteriaImageModel>(criteriaAndNaming.Item2);
            var files = tempDirectory.GetFiles();

            eta = new EtaCalculator(3, 30);
            await ProcessSelectedDocumentsAsync(prog, token, types, eta, files.ToList());

            DialogTitle   = "Complete";
            DialogText    = "The documents you selected have been classified.";
            DialogVisible = true;
        }