static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\net2.00\tessdata"; // Load sample PDF document extractor.LoadDocumentFromFile("sample_ocr.pdf"); // Apply predefined profiles extractor.Profiles = "scanned, no-layout"; // Extract text to file extractor.SaveTextToFile("result1.txt"); extractor.Reset(); // Load another document extractor.LoadDocumentFromFile("sample_ocr.pdf"); // Load and apply custom profiles extractor.LoadProfiles("profiles.json"); extractor.Profiles = "keep-formatting, ocr-forced-200dpi"; // Extract text to file extractor.SaveTextToFile("result2.txt"); extractor.Dispose(); }
static void Main() { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Get PDF files string[] pdfFiles = Directory.GetFiles(".", "*.pdf"); foreach (string file in pdfFiles) { // Load document extractor.LoadDocumentFromFile(file); // Save extracted text to .txt file extractor.SaveTextToFile(Path.ChangeExtension(file, ".txt")); // Reset the extractor before load another file extractor.Reset(); } // Cleanup extractor.Dispose(); }
protected void Page_Load(object sender, EventArgs e) { // Directory containing test files String inputFolder = Server.MapPath(@".\bin"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; Response.Clear(); Response.ContentType = "text/html"; // Get PDF files string[] pdfFiles = Directory.GetFiles(inputFolder, "*.pdf"); foreach (string file in pdfFiles) { // Load document extractor.LoadDocumentFromFile(file); // Save extracted text to output stream extractor.SaveTextToStream(Response.OutputStream); // Reset the extractor before load another file extractor.Reset(); } Response.End(); }