C# (CSharp) TextExtractor.SavePageTextToFile Exemples

Langage de programmation: C# (CSharp)

Class/Type: TextExtractor

Méthode/Fonction: SavePageTextToFile

Exemples au hotexamples.com: 5

C# (CSharp) TextExtractor.SavePageTextToFile - 5 exemples trouvés. Ce sont les exemples réels les mieux notés de TextExtractor.SavePageTextToFile extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

LoadDocumentFromFile(30)

Extract(29)

GetPageCount(22)

Find(19)

SaveTextToFile(18)

FindNext(15)

ExtractText(15)

Dispose(13)

SetExtractionArea(13)

GetText(11)

GetTextFromPage(9)

FindAll(8)

IsValidFileType(6)

Begin(6)

Open(5)

SavePageTextToFile(5)

ExtractLine(4)

SaveTextToStream(4)

ExtractAll(4)

GetAsXML(4)

SavePageTextToStream(4)

GetFirstLine(3)

Reset(3)

TextExtractingWillBePotentiallySlow(3)

ResetExtractionArea(2)

PostImageAsync(2)

LoadProfiles(2)

ToString(2)

GetValue(2)

LoadDocumentFromStream(2)

GetPageRectangle(2)

Filter(2)

GetPageRect_Width(2)

GetPageRect_Height(2)

GetTextFromBitmapAsync(2)

GetWordCount(2)

Replace(1)

NextPage(1)

GetListValues(1)

IsOCRRecommendedForPage(1)

SelectStrategy(1)

ExtractFullText(1)

SupportedFormats(1)

SupportedLanguages(1)

CreateDocument(1)

AddFilter(1)

Méthodes fréquemment utilisées

LoadDocumentFromFile (30)

Extract (29)

GetPageCount (22)

Find (19)

SaveTextToFile (18)

FindNext (15)

ExtractText (15)

Dispose (13)

SetExtractionArea (13)

GetText (11)

Méthodes fréquemment utilisées

GetTextFromPage (9)

FindAll (8)

IsValidFileType (6)

Begin (6)

Open (5)

SavePageTextToFile (5)

ExtractLine (4)

SaveTextToStream (4)

ExtractAll (4)

GetAsXML (4)

SavePageTextToStream (4)

GetFirstLine (3)

Reset (3)

TextExtractingWillBePotentiallySlow (3)

ResetExtractionArea (2)

PostImageAsync (2)

LoadProfiles (2)

ToString (2)

GetValue (2)

LoadDocumentFromStream (2)

Méthodes fréquemment utilisées

SavePageTextToStream (4)

GetFirstLine (3)

Reset (3)

TextExtractingWillBePotentiallySlow (3)

ResetExtractionArea (2)

PostImageAsync (2)

LoadProfiles (2)

ToString (2)

GetValue (2)

LoadDocumentFromStream (2)

GetPageRectangle (2)

Filter (2)

GetPageRect_Width (2)

GetPageRect_Height (2)

GetTextFromBitmapAsync (2)

GetWordCount (2)

Replace (1)

NextPage (1)

GetListValues (1)

IsOCRRecommendedForPage (1)

SelectStrategy (1)

ExtractFullText (1)

SupportedFormats (1)

SupportedLanguages (1)

CreateDocument (1)

AddFilter (1)

Related in langs

Page (PHP)

AddressInterface (PHP)

buildGradient (C++)

execute_goto (C++)

PEMtoCertificateAndDER (Go)

NewAvgCollection (Go)

ListBase (Java)

Logger (Java)

strp_isoformat (Python)

Operand (Python)

Méthodes fréquemment utilisées

GetPageRectangle (2)

Filter (2)

GetPageRect_Width (2)

GetPageRect_Height (2)

GetTextFromBitmapAsync (2)

GetWordCount (2)

Replace (1)

NextPage (1)

GetListValues (1)

IsOCRRecommendedForPage (1)

SelectStrategy (1)

ExtractFullText (1)

SupportedFormats (1)

SupportedLanguages (1)

CreateDocument (1)

AddFilter (1)

Associées

RemindersDbContext

InteractableObjectBehaviour

DomainEventHub

ILavaEngine

DataSet.EmployeeDetail.EmpRecurringPaymentRow

Relationship

ListSolution

LineItemEntity

DefaultCollectorRegistry

ConfigDataFactory

Exemple #1

0

Afficher le fichier

static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\sample2.pdf"); // Get page count int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { string fileName = "page" + i + ".txt"; // Save extracted page text to file extractor.SavePageTextToFile(i, fileName); } // Cleanup extractor.Dispose(); // Open first output file in default associated application ProcessStartInfo processStartInfo = new ProcessStartInfo(@".\page1.txt"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }

Exemple #2

0

Afficher le fichier

static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(@".\sample2.pdf"); // Get page count int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { string fileName = "page" + i + ".txt"; // Save extracted page text to file extractor.SavePageTextToFile(i, fileName); } // Open first output file in default associated application System.Diagnostics.Process.Start(@".\page1.txt"); }

Exemple #3

0

Afficher le fichier

static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("columns.pdf"); // read width of the very first page (zero index) float pageWidth = extractor.GetPageRect_Width(0); float pageHeight = extractor.GetPageRect_Height(0); // now we are extracting content assuming we have 3 columns // equally distributed on pages // first calculate the width of the one column by dividing page width by number of columns (3) float columnWidth = pageWidth / 3f; // iterate through 3 columns for (int i = 0; i < 3; i++) { // set the extraction area to the #i column extractor.SetExtractionArea(i * columnWidth, 0, columnWidth, pageHeight); string outFileName = "columns-column" + i + ".txt"; extractor.SavePageTextToFile(0, outFileName); // Open output file in default associated application System.Diagnostics.Process.Start(outFileName); } }

Exemple #4

0

Afficher le fichier

Fichier : Program.cs Projet : bytescout/pdf-extractor-sdk-samples-c-sharp

static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor textExtractor = new TextExtractor(); textExtractor.RegistrationName = "demo"; textExtractor.RegistrationKey = "demo"; // Create Bytescout.PDFExtractor.TableDetector instance TableDetector tableDetector = new TableDetector(); tableDetector.RegistrationKey = "demo"; tableDetector.RegistrationName = "demo"; // Set table detection mode to "bordered tables" - best for tables with closed solid borders. tableDetector.ColumnDetectionMode = ColumnDetectionMode.BorderedTables; // We should define what kind of tables we should detect. // So we set min required number of columns to 3 ... tableDetector.DetectionMinNumberOfColumns = 3; // ... and we set min required number of rows to 3 tableDetector.DetectionMinNumberOfRows = 3; // Load sample PDF document textExtractor.LoadDocumentFromFile(@".\sample3.pdf"); tableDetector.LoadDocumentFromFile(@".\sample3.pdf"); // Get page count int pageCount = tableDetector.GetPageCount(); for (int i = 0; i < pageCount; i++) { int t = 1; // Find first table and continue if found if (tableDetector.FindTable(i)) { do { // Set extraction area for CSV extractor to rectangle received from the table detector textExtractor.SetExtractionArea(tableDetector.FoundTableLocation); // Export the table to TEXT file textExtractor.SavePageTextToFile(i, "page-" + i + "-table-" + t + ".txt"); t++; }while (tableDetector.FindNextTable()); // search next table } } // Cleanup textExtractor.Dispose(); tableDetector.Dispose(); // Open first output file in default associated application (for demo purposes) ProcessStartInfo processStartInfo = new ProcessStartInfo("page-0-table-1.txt"); processStartInfo.UseShellExecute = true; Process.Start(processStartInfo); }

Exemple #5

0

Afficher le fichier

private void btnRunOCR_Click(object sender, EventArgs e) { TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(pdfViewerControl1.InputFile); // Enable Optical Character Recognition (OCR) // in .Auto mode (SDK automatically checks if needs to use OCR or not) extractor.OCRMode = OCRMode.Auto; // Set the location of "tessdata" folder containing language data files extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\net4.00\tessdata\"; // Set OCR language extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata // Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00 // Set PDF document rendering resolution extractor.OCRResolution = 300; // Set the extraction area to the viewer's selection rectangle RectangleF[] selection = pdfViewerControl1.SelectionInPoints; if (selection.Length > 0) { extractor.SetExtractionArea(selection[0]); } // Show wait cursor Cursor = Cursors.WaitCursor; try { // Perform OCR and save result to file extractor.SavePageTextToFile(pdfViewerControl1.CurrentPageIndex, "result.txt"); } finally { // Revert cursor to default Cursor = Cursors.Default; } // Cleanup extractor.Dispose(); // Open output file in default associated application System.Diagnostics.Process.Start("result.txt"); }