Esempio n. 1
0
        /*
         * Run with your own PDF or use the sample Amtrak-Financials file
         * provided in this repo.
         */
        async static Task Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("Please provide the filepath of a PDF to OCR");
                Environment.Exit(0);
            }

            IndicoConfig config = new IndicoConfig(
                host: "app.indico.io"
                );
            IndicoClient client = new IndicoClient(config);

            JObject extractConfig = new JObject()
            {
                { "preset_config", "standard" }
            };

            DocumentExtraction ocrQuery = client.DocumentExtraction(extractConfig);
            Job job = await ocrQuery.Exec(args[0]);

            JObject result = await job.Result();

            string url  = (string)result.GetValue("url");
            Blob   blob = await client.RetrieveBlob(url).Exec();

            Console.WriteLine(blob.AsJSONObject());
        }
        /// <summary>
        /// Create a new DocumentExtraction client to OCR files
        /// </summary>
        /// <param name="jsonConfig">DocumentExtraction passed in as a JSON Object. Defaults to null</param>
        /// <returns>DocumentExtraction</returns>
        public DocumentExtraction DocumentExtraction(JObject jsonConfig = null)
        {
            DocumentExtraction ocr = new DocumentExtraction(this);

            if (jsonConfig != null)
            {
                ocr.JsonConfig = jsonConfig;
            }
            return(ocr);
        }
Esempio n. 3
0
 public void DocumentExtraction_Initialize()
 {
     try
     {
         _extractionService = new DocumentExtraction();
     }
     catch (Exception ex)
     {
         Assert.Fail(ex.ToString());
     }
 }
Esempio n. 4
0
        /*
         * To actually run this example, you'll need to train a sequence
         * model on the Indico IPA Platform with the labeled-swaps-200.csv
         * file contained with this repo. Be aware that training will likely
         * take a couple hours. Once training is complete, you can run the
         * example by passing in the directory of the two sample PDF files in
         * the repo (Confirmation letter and Confirmation of Interest Rate Swap).
         *
         * Before running, replace the Model Group ID (mgId) with the
         * ID for your trained model. You can find it on the model's Review page.
         */
        async static Task Main(string[] args)
        {
            // Replace this with your Model Group ID
            int mgId = 4352;

            List <string> targetFiles = GetTargetFiles(args[0]);

            if (targetFiles.Count == 0)
            {
                Console.WriteLine("No files to process");
                Environment.Exit(0);
            }

            IndicoClient client = new IndicoClient();

            JObject extractConfig = new JObject()
            {
                { "preset_config", "legacy" }
            };

            List <string>      texts    = new List <string>();
            DocumentExtraction ocrQuery = client.DocumentExtraction(extractConfig);

            foreach (string path in targetFiles)
            {
                Console.WriteLine(path);
                Job ocrJob = await ocrQuery.Exec(path);

                JObject result = await ocrJob.Result();

                string resUrl = (string)result.GetValue("url");
                Blob   blob   = await client.RetrieveBlob(resUrl).Exec();

                JObject obj = blob.AsJSONObject();
                texts.Add((string)obj.GetValue("text"));
            }

            ModelGroup mg = await client.ModelGroupQuery(mgId).Exec();

            string status = await client.ModelGroupLoad(mg).Exec();

            Console.WriteLine($"Model status = {status}");

            Job job = await client.ModelGroupPredict(mg).Data(texts).Exec();

            JArray jobResult = await job.Results();

            Console.WriteLine(jobResult);
        }
        public async Task <string> ExtractDocumentAsync(string filePath, DocumentExtractionPreset preset, CancellationToken cancellationToken)
        {
            var config = new JObject
            {
                { "preset_config", preset.ToString("F").ToLower() }
            };
            var docExtraction =
                new DocumentExtraction(_indicoClientLegacy)
            {
                JsonConfig = config,
            };
            var job = await docExtraction.Exec(filePath, cancellationToken);

            return(job.Id);
        }
 public static string Invoke(this DocumentExtraction documentExtractionActivity) =>
 documentExtractionActivity.Invoke <DocumentExtraction, string>((a, outArg) => a.Results = outArg);