/* * Run with your own PDF or use the sample Amtrak-Financials file * provided in this repo. */ async static Task Main(string[] args) { if (args.Length == 0) { Console.WriteLine("Please provide the filepath of a PDF to OCR"); Environment.Exit(0); } IndicoConfig config = new IndicoConfig( host: "app.indico.io" ); IndicoClient client = new IndicoClient(config); JObject extractConfig = new JObject() { { "preset_config", "standard" } }; DocumentExtraction ocrQuery = client.DocumentExtraction(extractConfig); Job job = await ocrQuery.Exec(args[0]); JObject result = await job.Result(); string url = (string)result.GetValue("url"); Blob blob = await client.RetrieveBlob(url).Exec(); Console.WriteLine(blob.AsJSONObject()); }
/// <summary> /// Create a new DocumentExtraction client to OCR files /// </summary> /// <param name="jsonConfig">DocumentExtraction passed in as a JSON Object. Defaults to null</param> /// <returns>DocumentExtraction</returns> public DocumentExtraction DocumentExtraction(JObject jsonConfig = null) { DocumentExtraction ocr = new DocumentExtraction(this); if (jsonConfig != null) { ocr.JsonConfig = jsonConfig; } return(ocr); }
public void DocumentExtraction_Initialize() { try { _extractionService = new DocumentExtraction(); } catch (Exception ex) { Assert.Fail(ex.ToString()); } }
/* * To actually run this example, you'll need to train a sequence * model on the Indico IPA Platform with the labeled-swaps-200.csv * file contained with this repo. Be aware that training will likely * take a couple hours. Once training is complete, you can run the * example by passing in the directory of the two sample PDF files in * the repo (Confirmation letter and Confirmation of Interest Rate Swap). * * Before running, replace the Model Group ID (mgId) with the * ID for your trained model. You can find it on the model's Review page. */ async static Task Main(string[] args) { // Replace this with your Model Group ID int mgId = 4352; List <string> targetFiles = GetTargetFiles(args[0]); if (targetFiles.Count == 0) { Console.WriteLine("No files to process"); Environment.Exit(0); } IndicoClient client = new IndicoClient(); JObject extractConfig = new JObject() { { "preset_config", "legacy" } }; List <string> texts = new List <string>(); DocumentExtraction ocrQuery = client.DocumentExtraction(extractConfig); foreach (string path in targetFiles) { Console.WriteLine(path); Job ocrJob = await ocrQuery.Exec(path); JObject result = await ocrJob.Result(); string resUrl = (string)result.GetValue("url"); Blob blob = await client.RetrieveBlob(resUrl).Exec(); JObject obj = blob.AsJSONObject(); texts.Add((string)obj.GetValue("text")); } ModelGroup mg = await client.ModelGroupQuery(mgId).Exec(); string status = await client.ModelGroupLoad(mg).Exec(); Console.WriteLine($"Model status = {status}"); Job job = await client.ModelGroupPredict(mg).Data(texts).Exec(); JArray jobResult = await job.Results(); Console.WriteLine(jobResult); }
public async Task <string> ExtractDocumentAsync(string filePath, DocumentExtractionPreset preset, CancellationToken cancellationToken) { var config = new JObject { { "preset_config", preset.ToString("F").ToLower() } }; var docExtraction = new DocumentExtraction(_indicoClientLegacy) { JsonConfig = config, }; var job = await docExtraction.Exec(filePath, cancellationToken); return(job.Id); }
public static string Invoke(this DocumentExtraction documentExtractionActivity) => documentExtractionActivity.Invoke <DocumentExtraction, string>((a, outArg) => a.Results = outArg);