Пример #1
0
        public async Task <List <DataFieldResult> > GetTextFromPdfForPositionalDataFields(string pdfFilePath, List <DataFieldClassTemplate> datafields)
        {
            var retVal = new List <DataFieldResult>();

            if (string.IsNullOrWhiteSpace(pdfFilePath) || File.Exists(pdfFilePath) == false)
            {
                return(retVal);
            }

            //  var pdfInfo = await GetPdfPageSize(pdfFilePath);
            //var pdfInfo = await AreaLoader.GetPdfPageSize(pdfFilePath);

            var areaInfoList = datafields.Where(x => x.FieldMode == DataFieldMode.Position).Select(x => x.ValueArea).ToList();

            var textList = await AreaLoader.GetTextFromPdf(pdfFilePath, areaInfoList);

            foreach (var item in datafields)
            {
                if (item.FieldMode == DataFieldMode.Position)
                {
                    var resultItem = new DataFieldResult()
                    {
                        FieldType = item.FieldType, Name = item.Name
                    };
                    //    resultItem.Value = await GetTextFromPdf(pdfFilePath, item.ValueArea);
                    resultItem.Value = textList.First();
                    textList.RemoveAt(0);
                    retVal.Add(resultItem);
                }
            }

            return(retVal);
        }
Пример #2
0
        /// <summary>
        /// Extracts data from input text based on the given class template and matching group template.
        /// </summary>
        /// <param name="template">The class template to be used</param>
        /// <param name="groupTemplates">Available group templates. The correct group template for the given class template will be selected automatically. If it is the correct one, it's okay if only one group template is in the list.</param>
        /// <param name="inputText"></param>
        /// <param name="pdfFilePath"></param>
        /// <returns></returns>
        public async Task <FieldExtractionResult> ExtractData(DocumentClassTemplate template, List <DocumentGroupTemplate> groupTemplates, string inputText, string pdfFilePath)
        {
            var retVal = new FieldExtractionResult()
            {
                TemplateClassName = template.TemplateClassName, TemplateGroupName = template.TemplateGroupName
            };

            foreach (var item in template.DataFields)
            {
                var resultItem = new DataFieldResult()
                {
                    FieldType = item.FieldType, Name = item.Name
                };
                if (item.FieldMode == DataFieldMode.Regex)
                {
                    resultItem.Value = ExecuteRegexExpression(inputText, item.RegexExpressions);
                }
                retVal.DataFields.Add(resultItem);
            }

            var dataFieldsPosition = await TextLoader.GetTextFromPdfForPositionalDataFields(pdfFilePath, template.DataFields.Where(x => x.FieldMode == DataFieldMode.Position).ToList());

            foreach (var dataFieldPos in dataFieldsPosition)
            {
                var retValDataField = retVal.DataFields.Where(x => x.Name == dataFieldPos.Name).FirstOrDefault();
                if (retValDataField != null)
                {
                    retValDataField.Value = dataFieldPos.Value;
                }
            }


            //var groupTemplate = GetDocumentGroupTemplateByName(template.TemplateGroupName);
            var groupTemplate = groupTemplates.Where(x => x.TemplateGroupName == template.TemplateGroupName).FirstOrDefault();

            if (groupTemplate != null)
            {
                var fieldCalculator = new FieldCalculator();
                foreach (var item in groupTemplate.CalculationFields)
                {
                    retVal.CalculationFields.Add(fieldCalculator.CompareExpressionResults(item, retVal.DataFields));
                }
            }

            var conditionProcessor        = new ConditionalFieldProcessor();
            var allConditionalClassFields = template.ConditionalFields;


            // Add conditional fields to class template if necessary.
            if (groupTemplate != null)
            {
                var conditionalFieldsHash = allConditionalClassFields.Select(x => x.Name.ToLower()).ToHashSet();
                foreach (var item in groupTemplate.ConditionalFields.Where(x => x.OnlyStoreInGroupTemplate == false))
                {
                    var lowerName = item.Name.ToLower();
                    if (conditionalFieldsHash.Contains(lowerName) == false)
                    {
                        allConditionalClassFields.Add(item);
                    }
                }
                //allConditionalFields.AddRange(groupTemplate.ConditionalFields.Where(x => x.OnlyStoreInGroupTemplate));
            }

            foreach (var item in allConditionalClassFields)
            {
                var conditionalFieldResult = conditionProcessor.ProcessConditions(inputText, item);
                if (retVal.ConditionalFields.Where(x => x.Name == conditionalFieldResult.Name).Count() == 0)
                {
                    retVal.ConditionalFields.Add(conditionalFieldResult);
                }
            }

            if (groupTemplate != null)
            {
                foreach (var item in groupTemplate?.ConditionalFields.Where(x => x.OnlyStoreInGroupTemplate == true))
                {
                    var conditionalFieldResult = conditionProcessor.ProcessConditions(inputText, item);
                    if (retVal.ConditionalFields.Where(x => x.Name == conditionalFieldResult.Name).Count() == 0)
                    {
                        retVal.ConditionalFields.Add(conditionalFieldResult);
                    }
                }
            }

            return(retVal);
        }