public ExtractionManagerTest()
        {
            _pdfToText = A.Fake <PdfToText>();
            _cognitive = A.Fake <Cognitive>();

            _manager = new ExtractionManager(_pdfToText, _cognitive);
        }
Esempio n. 2
0
        private void SearchInPDFButton_Click(object sender, EventArgs e)
        {
            //This will look through all the pdfs in the list and search for a keyword
            // then spit out a list of the pdfs and pages they were found
            List <string> filesIncludingSearch = new List <string>();

            try
            {
                for (int i = 0; i < FileList.Items.Count; i++)
                {
                    var         filePath    = FileList.Items[i].Text;
                    PdfDocument pdfDocument = new PdfDocument(filePath);
                    for (int j = 0; j < pdfDocument.Pages.Count; j++)
                    {
                        PdfPage pdfPage = pdfDocument.Pages[j];
                        //string content;
                        string pdfText   = pdfPage.ToString();
                        var    pdfToText = new PdfToText();
                        pdfToText.Load(filePath);
                        var content = pdfToText.GetText();
                        if (content.Contains(SearchQueryTextBox.ToString()))
                        {
                            filesIncludingSearch.Add($"Found in {filePath} on page {j}");
                        }
                    }
                }
                ;

                MessageBox.Show($"Sucessfully searched. Found on {String.Join(",\n", filesIncludingSearch)}");
            }
            catch (Exception)
            {
                MessageBox.Show("Failed To Search");
            }



            //for (int i = 0; i < FileList.Items.Count; i++)
            //{
            //    string fileName = FileList.Items[i].Text;
            //    PdfDocument pdfDocument = new PdfDocument(fileName);
            //    for (int j = 0; j < pdfDocument.Pages.Count; j++)
            //    {
            //        PdfPage pdfPage = pdfDocument.Pages[j];
            //        //string content;
            //        string pdfText = pdfPage.ToString();

            //        var content = new PdfToText().Search(SearchQueryTextBox.Text, false, false);
            //        if (content.ToString().Contains(SearchQueryTextBox.ToString()))
            //        {
            //            filesIncludingSearch.Add($"Found in {fileName} on page {j}");
            //        }
            //    }
            //}
        }
Esempio n. 3
0
 public ProcessDocument(
     FileManager fileManager,
     PdfToText pdfToText,
     Cognitive cognitive,
     Callback callback,
     IBackgroundJobClient backgroundJobs)
 {
     _fileManager    = fileManager;
     _pdfToText      = pdfToText;
     _cognitive      = cognitive;
     _callback       = callback;
     _backgroundJobs = backgroundJobs;
 }
        private static void ConvertToTxt(string filename)
        {
            var txtFilename = filename + ".txt";

            if (File.Exists(filename) && !File.Exists(txtFilename))
            {
                string text = PdfToText.GetPdfText(filename);
                if (text != null && text.Length > 0)
                {
                    File.WriteAllText(txtFilename, text);
                }
            }
        }
Esempio n. 5
0
        private void MenuItem_Click(object sender, RoutedEventArgs e)
        {
            var openFileDialog = new OpenFileDialog
            {
                Filter           = "PDF (*.pdf)|*.Pdf",
                Multiselect      = true,
                InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments)
            };

            if (openFileDialog.ShowDialog() == true)
            {
                LoadedText = PdfToText.GetText(openFileDialog.FileName);
                DisplayResults();
            }
        }
Esempio n. 6
0
        private void MenuItem_Click_1(object sender, RoutedEventArgs e)
        {
            var openFileDialog = new OpenFileDialog
            {
                Filter           = "PDF (*.pdf)|*.pdf",
                Multiselect      = true,
                InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments)
            };

            if (openFileDialog.ShowDialog() == true)
            {
                DocumentPath    = Path.GetDirectoryName(openFileDialog.FileName);
                PdfDocumentText = PdfToText.GetPagesText(openFileDialog.FileName).ToArray();
                RecalculateData();
            }
        }
Esempio n. 7
0
        public static IEnumerable <PdfFileModel> LoadPdfFiles()
        {
            var openFileDialog = new OpenFileDialog
            {
                Filter           = "PDF (*.pdf)|*.Pdf",
                Multiselect      = true,
                InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments)
            };

            if (openFileDialog.ShowDialog() == true)
            {
                foreach (var fileName in openFileDialog.FileNames)
                {
                    Thread.Sleep(500);
                    yield return(new PdfFileModel {
                        FileName = fileName, FileContent = PdfToText.GetText(fileName)
                    });
                }
            }
        }
Esempio n. 8
0
        private void MenuItem_Click_1(object sender, RoutedEventArgs e)
        {
            var openFileDialog = new OpenFileDialog
            {
                Filter           = "PDF (*.pdf)|*.pdf",
                Multiselect      = true,
                InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments),
                RestoreDirectory = true,
            };

            if (openFileDialog.ShowDialog() == true)
            {
                Results = openFileDialog
                          .FileNames
                          .Select(f => new SearchFileViewModel {
                    DocumentName = f
                })
                          .Concat(Results)
                          .GroupBy(f => f.DocumentName)
                          .Select(g => g.First())
                          .ToArray();

                Task.Run(async() =>
                {
                    var resultList = new List <SearchFileViewModel>();
                    foreach (var result in Results)
                    {
                        if (string.IsNullOrEmpty(result.DocumentContent))
                        {
                            result.DocumentContent = PdfToText.GetText(result.DocumentName);
                        }
                        resultList.Add(result);
                    }
                    Results = resultList;
                });
            }
        }
        public List <GerenciadorArquivoModel> CheckPdf(List <GerenciadorArquivoModel> arquivos)
        {
            try
            {
                foreach (var item in arquivos)
                {
                    #region
                    ////Abre o arquivo pdf
                    //using (PdfReader leitor = new PdfReader(item.CaminhoFisico))
                    //{
                    //    //Cria uma String
                    //    StringBuilder texto = new StringBuilder();
                    //    for (int i = 1; i <= leitor.NumberOfPages; i++)
                    //    {
                    //        //Adiciona o texto que extraiu do pdf
                    //        texto.Append(PdfTextExtractor.GetTextFromPage(leitor, i));

                    //        string[] teste = texto.ToString().Split('\n');
                    //    }
                    //}
                    #endregion

                    // instantiate a pdf to text converter object
                    PdfToText pdfToText = new PdfToText();

                    // load PDF file
                    pdfToText.Load(item.CaminhoFisico);

                    // set the properties
                    pdfToText.Layout          = TextLayout.Original;
                    pdfToText.StartPageNumber = 1;
                    pdfToText.EndPageNumber   = 0;

                    // extract the text
                    string text = pdfToText.GetText();

                    string freetrial = "===========================================================================================================\r\n\r\nYou are currently using Demo Version - Select.Pdf SDK. With the free trial version,\r\nonly the first 3 pages of the PDF document are converted to text.\r\n\r\n===========================================================================================================\r\n\r\n\r\n\r\nDemo Version - Select.Pdf SDK - http://selectpdf.com\r\n";

                    if (text == freetrial)
                    {
                        text = "";
                    }
                    //transforma em string
                    var textopdf = text.ToString();

                    //char teste = ControlChars.Lf;

                    //var vamotropa = text.Split(teste);

                    //verifica se veio algum texto do pdf
                    if (textopdf != "")
                    {
                        //Valida como pdf e insere o texto na classe
                        item.Pdf   = true;
                        item.Texto = text;
                    }
                }
                return(arquivos);
            }
            catch (Exception ex)
            {
                return(new List <GerenciadorArquivoModel>());
            }
        }
Esempio n. 10
0
 public ExtractionManager(PdfToText pdfToText, Cognitive cognitive)
 {
     _extractionStrategies = new List <IExtractTextStrategy> {
         pdfToText, cognitive
     };
 }