public static void Run() { var configuration = new Configuration(Common.MyAppSid, Common.MyAppKey); var apiInstance = new ParseApi(configuration); try { var options = new TextOptions { FileInfo = new FileInfo { FilePath = "cells/two-worksheets.xlsx", StorageName = Common.MyStorage }, StartPageNumber = 1, CountPagesToExtract = 1 }; var request = new TextRequest(options); var response = apiInstance.Text(request); foreach (var page in response.Pages) { Console.WriteLine($"PageIndex: {page.PageIndex}. Text: {page.Text}"); } } catch (Exception e) { Console.WriteLine("Exception while calling ParseApi: " + e.Message); } }
public void TestParse_RawTemplate() { var testFile = TestFiles.TemplateDocumentDocx; var options = new ParseOptions { FileInfo = testFile.ToFileInfo(), Template = GetTemplate() }; var request = new ParseRequest(options); var result = ParseApi.Parse(request); Assert.IsNotNull(result); Assert.IsNotEmpty(result.FieldsData); Assert.AreEqual(4, result.Count); var dataFieldNames = new[] { "FIELD1", "RELATEDFIELD2", "REGEX", "TABLECELLS" }; foreach (var field in result.FieldsData) { Assert.IsTrue(dataFieldNames.Contains(field.Name)); } var table = result.FieldsData.First(x => string.Equals(x.Name, "TABLECELLS")).PageArea.PageTableArea; if (table != null) { Assert.AreEqual(4, table.ColumnCount); Assert.AreEqual(3, table.RowCount); Assert.AreEqual( "Cell 12", table.PageTableAreaCells.First(x => x.ColumnIndex == 2 && x.RowIndex == 1).PageArea.PageTextArea.Text); } }
public void Analyse(IOutputType outputType) { var trackId = _interaction.GetTrackId(); var trackResults = ParseApi.GetResultsFor(trackId); outputType.Print(trackResults); }
public void TestExtractPages() { var testFile = TestFiles.FourPages; var options = new TextOptions { FileInfo = testFile.ToFileInfo(), StartPageNumber = 0, CountPagesToExtract = 4, FormattedTextOptions = new FormattedTextOptions { Mode = "PlainText" } }; var request = new TextRequest(options); var result = ParseApi.Text(request); Assert.IsNotNull(result.Pages); Assert.AreEqual(0, result.Pages[0].PageIndex); Assert.AreEqual( "Text inside bookmark 0\r\n\r\nPage 0 heading\r\n\r\nPage Text - Page 0\r\n\r\n\fText inside bookmark 1\r\n\r\n", result.Pages[0].Text); Assert.AreEqual(3, result.Pages[3].PageIndex); Assert.AreEqual("\fText inside bookmark 3\r\n\r\nPage 3 heading\r\n\r\nPage Text - Page 3\r\n\r\n", result.Pages[3].Text); }
public static void Run() { var configuration = new Configuration(Common.MyAppSid, Common.MyAppKey); var apiInstance = new ParseApi(configuration); try { var options = new TextOptions { FileInfo = new FileInfo { FilePath = "pdf/PDF with attachements.pdf", Password = "******", StorageName = Common.MyStorage }, ContainerItemInfo = new ContainerItemInfo { RelativePath = "template-document.pdf" }, StartPageNumber = 2, CountPagesToExtract = 1 }; var request = new TextRequest(options); var response = apiInstance.Text(request); Console.WriteLine($"Text: {response.Pages[0].Text}"); } catch (Exception e) { Console.WriteLine("Exception while calling ParseApi: " + e.Message); } }
public static void Run() { var configuration = new Configuration(Common.MyAppSid, Common.MyAppKey); var apiInstance = new ParseApi(configuration); try { var options = new ImagesOptions() { FileInfo = new FileInfo { FilePath = "slides/three-slides.pptx", StorageName = Common.MyStorage }, StartPageNumber = 1, CountPagesToExtract = 2 }; var request = new ImagesRequest(options); var response = apiInstance.Images(request); foreach (var page in response.Pages) { Console.WriteLine($"Images from {page.PageIndex} page."); foreach (var image in page.Images) { Console.WriteLine($"Image path in storage: {image.Path}. Download url: {image.DownloadUrl}"); Console.WriteLine($"File format: {image.FileFormat}. Page index: {image.PageIndex}"); } } } catch (Exception e) { Console.WriteLine("Exception while calling ParseApi: " + e.Message); } }
public static void Run() { var configuration = new Configuration(Common.MyAppSid, Common.MyAppKey); var apiInstance = new ParseApi(configuration); try { var options = new TextOptions { FileInfo = new FileInfo { FilePath = "words-processing/docx/formatted-document.docx", StorageName = Common.MyStorage }, FormattedTextOptions = new FormattedTextOptions { Mode = "Markdown" } }; var request = new TextRequest(options); var response = apiInstance.Text(request); Console.WriteLine($"Text: {response.Text}"); } catch (Exception e) { Console.WriteLine("Exception while calling ParseApi: " + e.Message); } }
public static void Run() { var configuration = new Configuration(Common.MyAppSid, Common.MyAppKey); var apiInstance = new ParseApi(configuration); try { var options = new TextOptions { FileInfo = new FileInfo { FilePath = "email/eml/embedded-image-and-attachment.eml", StorageName = Common.MyStorage } }; var request = new TextRequest(options); var response = apiInstance.Text(request); Console.WriteLine($"Text: {response.Text}"); } catch (Exception e) { Console.WriteLine("Exception while calling ParseApi: " + e.Message); } }
public void Analyse(IOutputType outputType) { int maxTrackId = int.Parse(ConfigurationManager.AppSettings["NumTracks"]); for (int trackId = 0; trackId < maxTrackId; trackId++) { var results = ParseApi.GetResultsFor(trackId); outputType.Print(results); _ui.WaitForInteraction(); } }
public void TestText_NotSupportedFile() { var testFile = TestFiles.JpegFile; var options = new TextOptions { FileInfo = testFile.ToFileInfo(), }; var request = new TextRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Text(request); }); Assert.AreEqual($"The specified file '{testFile.FullName}' has type which is not currently supported.", ex.Message); }
public void TestParse_WithoutOptions() { var testFile = TestFiles.JpegFile; var options = new ParseOptions { FileInfo = testFile.ToFileInfo(), }; var request = new ParseRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Parse(request); }); Assert.AreEqual("Request parameters missing or have incorrect format", ex.Message); }
public void TestGetImage_FileNotFoundResult() { var testFile = TestFiles.NotExist; var options = new ImagesOptions { FileInfo = testFile.ToFileInfo(), }; var request = new ImagesRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Images(request); }); Assert.AreEqual($"Can't find file located at '{testFile.FullName}'.", ex.Message); }
public void TestText() { var testFile = TestFiles.OnePage; var options = new TextOptions { FileInfo = testFile.ToFileInfo(), }; var request = new TextRequest(options); var result = ParseApi.Text(request); Assert.IsNotNull(result.Text); Assert.AreEqual("First Page\r\r\f", result.Text); }
public void TestText_IncorrectPassword() { var testFile = TestFiles.PasswordProtected; var options = new TextOptions { FileInfo = testFile.ToFileInfo(), }; options.FileInfo.Password = "******"; var request = new TextRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Text(request); }); Assert.AreEqual($"Password provided for file '{testFile.FullName}' is incorrect.", ex.Message); }
public void Analyse(IOutputType outputType) { int maxTrackId = int.Parse(ConfigurationManager.AppSettings["NumTracks"]); var allData = new List <TrackData>(); var loadingBar = new LoadingBar("Fetching Data", maxTrackId); loadingBar.Start(); for (int trackId = 0; trackId < maxTrackId; trackId++) { allData.Add(ParseApi.GetResultsFor(trackId)); loadingBar.Next(); } outputType.Print(allData); }
public void TestParse_FileNotFoundResult() { var testFile = TestFiles.NotExist; var options = new ParseOptions { FileInfo = testFile.ToFileInfo(), TemplatePath = "templates/document-template.json", Template = GetTemplate() }; var request = new ParseRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Parse(request); }); Assert.AreEqual($"Can't find file located at '{testFile.FullName}'.", ex.Message); }
public void TestParse_NotSupportedFile() { var testFile = TestFiles.JpegFile; var options = new ParseOptions { FileInfo = testFile.ToFileInfo(), TemplatePath = "templates/document-template.json", Template = GetTemplate() }; var request = new ParseRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Parse(request); }); Assert.AreEqual($"The specified file '{testFile.FullName}' has type which is not currently supported.", ex.Message); }
public void ImageExtractTest_Pdf_Container_FromPages_Error() { var testFile = TestFiles.Zip; var options = new ImagesOptions { FileInfo = testFile.ToFileInfo(), StartPageNumber = 1, CountPagesToExtract = 2 }; var request = new ImagesRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Images(request); }); Assert.AreEqual($"The specified file '{testFile.FullName}' has type which is not currently supported.", ex.Message); }
public void ImageExtractTest_Pdf_FromPages_OutOfThePageRange() { var testFile = TestFiles.Pdf; var options = new ImagesOptions { FileInfo = testFile.ToFileInfo(), StartPageNumber = 3, CountPagesToExtract = 5 }; var request = new ImagesRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Images(request); }); Assert.AreEqual("Request parameters missing or have incorrect format", ex.Message); }
public void TestGetImage_IncorrectPassword() { var testFile = TestFiles.PasswordProtected; var options = new ImagesOptions { FileInfo = new FileInfo { FilePath = testFile.FullName, Password = "******" }, }; var request = new ImagesRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Images(request); }); Assert.AreEqual($"Password provided for file '{testFile.FullName}' is incorrect.", ex.Message); }
public void BeforeAllTests() { var config = new Configuration(_appSid, _appKey) { ApiBaseUrl = _apiBaseUrl }; ParseApi = new ParseApi(config); InfoApi = new InfoApi(config); TemplateApi = new TemplateApi(config); FileApi = new FileApi(config); FolderApi = new FolderApi(config); StorageApi = new StorageApi(config); UploadTestFiles(); }
public void TestParse_IncorrectPassword() { var testFile = TestFiles.PasswordProtected; var options = new ParseOptions { FileInfo = testFile.ToFileInfo(), TemplatePath = "templates/document-template.json", Template = GetTemplate() }; options.FileInfo.Password = "******"; var request = new ParseRequest(options); var ex = Assert.Throws <ApiException>(() => { ParseApi.Parse(request); }); Assert.AreEqual($"Password provided for file '{testFile.FullName}' is incorrect.", ex.Message); }
public static void Run() { // For example purposes create template if not exists. TemplateUtils.CreateIfNotExist("templates/companies.json"); var configuration = new Configuration(Common.MyAppSid, Common.MyAppKey); var apiInstance = new ParseApi(configuration); try { var options = new ParseOptions { FileInfo = new FileInfo { FilePath = "words-processing/docx/companies.docx", StorageName = Common.MyStorage }, TemplatePath = "templates/companies.json" }; var request = new ParseRequest(options); var response = apiInstance.Parse(request); foreach (var data in response.FieldsData) { if (data.PageArea.PageTextArea != null) { Console.WriteLine($"Field name: {data.Name}. Text : {data.PageArea.PageTextArea.Text}"); } if (data.PageArea.PageTableArea != null) { Console.WriteLine($"Table name: {data.Name}."); foreach (var cell in data.PageArea.PageTableArea.PageTableAreaCells) { Console.WriteLine( $"Table cell. Row {cell.RowIndex} column {cell.ColumnIndex}. Text: {cell.PageArea.PageTextArea.Text}"); } } } } catch (Exception e) { Console.WriteLine("Exception while calling ParseApi: " + e.Message); } }
public static void Run() { var configuration = new Configuration(Common.MyAppSid, Common.MyAppKey); var apiInstance = new ParseApi(configuration); try { var options = new ParseOptions { FileInfo = new FileInfo { FilePath = "containers/archive/companies.zip", StorageName = Common.MyStorage }, ContainerItemInfo = new ContainerItemInfo { RelativePath = "companies.docx" }, Template = TemplateUtils.GetTemplate() }; var request = new ParseRequest(options); var response = apiInstance.Parse(request); foreach (var data in response.FieldsData) { if (data.PageArea.PageTextArea != null) { Console.WriteLine($"Field name: {data.Name}. Text : {data.PageArea.PageTextArea.Text}"); } if (data.PageArea.PageTableArea != null) { Console.WriteLine($"Table name: {data.Name}."); foreach (var cell in data.PageArea.PageTableArea.PageTableAreaCells) { Console.WriteLine( $"Table cell. Row {cell.RowIndex} column {cell.ColumnIndex}. Text: {cell.PageArea.PageTextArea.Text}"); } } } } catch (Exception e) { Console.WriteLine("Exception while calling ParseApi: " + e.Message); } }
public void TestExtractFormattedPage() { var testFile = TestFiles.FormattedDocument; var options = new TextOptions { FileInfo = testFile.ToFileInfo(), FormattedTextOptions = new FormattedTextOptions { Mode = "Markdown" }, StartPageNumber = 1, CountPagesToExtract = 1 }; var request = new TextRequest(options); var result = ParseApi.Text(request); Assert.IsNotEmpty(result.Pages); Assert.IsTrue(result.Pages[0].Text.Contains("**Second page bold text**")); Assert.IsTrue(result.Pages[0].Text.Contains("# Second page heading")); }
public void TestGetImage_Docx() { var testFile = TestFiles.FourPages; var options = new ImagesOptions { FileInfo = testFile.ToFileInfo(), }; var request = new ImagesRequest(options); var result = ParseApi.Images(request); Assert.IsNotNull(result); int i = 0; foreach (var image in result.Images) { Assert.AreEqual($"parser/images/words/docx/four-pages_docx/image_{i}.jpeg", image.Path); Assert.NotNull(image.DownloadUrl); i++; } }
public void TestGetImage_Pdf_FromPages() { var testFile = TestFiles.Pdf; var options = new ImagesOptions { FileInfo = testFile.ToFileInfo(), StartPageNumber = 1, CountPagesToExtract = 2 }; var request = new ImagesRequest(options); var result = ParseApi.Images(request); Assert.IsNotNull(result); Assert.IsNotEmpty(result.Pages); Assert.AreEqual(2, result.Pages.Count); Assert.AreEqual("parser/images/pdf/template-document_pdf/page_1/image_0.jpeg", result.Pages[0].Images[0].Path); Assert.AreEqual("parser/images/pdf/template-document_pdf/page_2/image_0.jpeg", result.Pages[1].Images[0].Path); }
public void TestGetImage_Email() { var testFile = TestFiles.ImageAndAttachment; var options = new ImagesOptions { FileInfo = testFile.ToFileInfo(), }; var request = new ImagesRequest(options); var result = ParseApi.Images(request); Assert.IsNotNull(result); var paths = new[] { "parser/images/email/eml/embedded-image-and-attachment_eml/", }; foreach (var image in result.Images) { Assert.IsTrue(paths.Any(image.Path.Contains)); } }
public void TestText_WithPassword() { var testFile = TestFiles.PasswordProtected; var options = new TextOptions { FileInfo = testFile.ToFileInfo(), StartPageNumber = 0, CountPagesToExtract = 1, FormattedTextOptions = new FormattedTextOptions { Mode = "PlainText" } }; var request = new TextRequest(options); var result = ParseApi.Text(request); Assert.IsNull(result.Text); Assert.AreEqual( "Text inside a bookmark 1\r\n\r\nPage 1 heading!\r\n\r\nSample test text - Page 1!\r\n\r\n\fText inside a bookmark 2\r\n\r\n", result.Pages[0].Text); }
public void TestExtractFormatted() { var testFile = TestFiles.FormattedDocument; var options = new TextOptions { FileInfo = testFile.ToFileInfo(), FormattedTextOptions = new FormattedTextOptions { Mode = "Html" } }; var request = new TextRequest(options); var result = ParseApi.Text(request); Assert.IsNotNull(result); Assert.IsTrue(result.Text.Contains("<b>Bold text</b>")); Assert.IsTrue(result.Text.Contains("<i>Italic text</i>")); Assert.IsTrue(result.Text.Contains("<h1>Heading 1</h1>")); Assert.IsTrue(result.Text.Contains("<tr><td><p>table</p></td>")); Assert.IsTrue(result.Text.Contains("<ol><li><i>First element</i>")); Assert.IsTrue(result.Text.Contains("<a href=\"http://targetwebsite.domain\">Hyperlink </a>")); }