FluentQuery, DocumentLab C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

        private static string FindValueForLabelBase(FluentQuery response, string[] textTypeOfValue, int maxSteps, string[] labelInDocument)
        {
            response.QueryType = QueryType.SingleCapture;
            response.AppendToScript($"Text({string.Join("||", labelInDocument)}) RD {maxSteps} [{Or(textTypeOfValue)}]");

            return(ExecuteSingleCapture(response));
        }

Exemple #2

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

        private static string GetValueAtLabelBase(FluentQuery response, Direction direction, string[] textTypeOfValue, string[] labelInDocument)
        {
            response.QueryType = QueryType.SingleCapture;
            response.AppendToScript($"Text({string.Join("||", labelInDocument)}) {direction} [{Or(textTypeOfValue)}]");

            return(ExecuteSingleCapture(response));
        }

Exemple #3

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

        private static string[] GetAnyBase(FluentQuery response, string[] textType)
        {
            response.QueryType = QueryType.Any;
            response.AppendToScript($"Any [{Or(textType)}]");

            return(response.ExecuteQuery().Select(x => x.Value).ToArray());
        }

Exemple #4

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

        private static string CaptureBase(FluentQuery response, string[] captureTextType)
        {
            if (response.QueryType == QueryType.None)
            {
                response.QueryType = QueryType.SingleCapture;
            }

            return(ExecuteSingleCapture(response.AppendToScript($"[{Or(captureTextType)}]")));
        }

Exemple #5

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

        private static FluentQuery CaptureNamedBase(FluentQuery response, string[] captureTextType, string propertyName)
        {
            response.QueryType = QueryType.MultiCapture;

            if (string.IsNullOrWhiteSpace(propertyName))
            {
                throw new FluentQueryException("The specified pattern has multiple captures, a property name must be specified when capturing more than one value.");
            }

            return(response.AppendToScript((!string.IsNullOrWhiteSpace(propertyName) ? $"'{propertyName}': " : string.Empty) + $"[{Or(captureTextType)}]"));
        }

Exemple #6

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

        /// <summary>
        /// Executes a multiple capture query
        /// </summary>
        /// <param name="fluentQuery">A FluentQuery object containing the script built so far.</param>
        /// <returns>The data specified for capture in the query in a dictionary.</returns>
        public static Dictionary <string, string> Capture(this FluentQuery fluentQuery, Func <FluentQuery, FluentQuery> query)
        {
            fluentQuery = query(fluentQuery);

            if (fluentQuery.QueryType != QueryType.MultiCapture)
            {
                throw new FluentQueryException("A multi capture query needs to have multiple captures specified");
            }

            return(fluentQuery.ExecuteQuery());
        }

Exemple #7

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

        private static Dictionary <string, string> ExecuteQuery(this FluentQuery fluentQuery)
        {
            if (fluentQuery.QueryType == QueryType.None)
            {
                throw new FluentQueryException("Query includes no capture tokens. Nothing to query for.");
            }

            fluentQuery.AppendToScript(";");

            var interpretationResultJson = fluentQuery
                                           .Interpreter
                                           .Interpret(fluentQuery.AnalyzedPage, fluentQuery.Script)
                                           .ConvertToJson(fluentQuery.Script);

            if (string.IsNullOrWhiteSpace(interpretationResultJson))
            {
                return(null);
            }

            var json = JObject.Parse(interpretationResultJson)[FluentQueryConstants.GeneratedScriptQuery];
            Dictionary <string, string> deserializedResult = null;

            switch (fluentQuery.QueryType)
            {
            case QueryType.SingleCapture:
                deserializedResult = JsonConvert.DeserializeObject <Dictionary <string, string> >(interpretationResultJson);
                break;

            case QueryType.MultiCapture:
                deserializedResult = JsonConvert.DeserializeObject <Dictionary <string, string> >(json.ToString());
                break;

            case QueryType.Any:
                deserializedResult = JsonConvert.DeserializeObject <string[]>(json.ToString())
                                     .Select((x, i) => new KeyValuePair <int, string>(i, x))
                                     .ToDictionary(x => x.Key.ToString(), x => x.Value);
                break;

            default: throw new FluentQueryException("Query type is invalid");
            }

            if (deserializedResult == null || deserializedResult.Count == 0)
            {
                return(null);
            }

            return(deserializedResult);
        }

Exemple #8

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Finds a value by a label's text using the Right-Down search algorithm. The value closest to the label's text in the right or down direction in the document will be chosen as the result.
 /// </summary>
 /// <param name="textTypeOfValue">Specifies the text type the capture operation needs to match</param>
 /// <param name="maxSteps">The maximum distance in terms of DocumentLab grid cells the label-value in the document can be. This is by default 6 which is sufficient for close-by elements but can be made longer or shorter depending on the type of the document.</param>
 /// <param name="labelInDocument">The label we expect to find in the document</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script that performs the value extraction.</returns>
 public static string FindValueForLabel(this FluentQuery response, string[] textTypeOfValue, int maxSteps = 6, params string[] labelInDocument)
 => FindValueForLabelBase(response, textTypeOfValue, maxSteps, labelInDocument);

Exemple #9

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Specifies a tablecolumn for the preceding table query initializer
 /// </summary>
 /// <param name="columnName">The name to assign for the column in the result output</param>
 /// <param name="columnTextType">The text type considered valid in the table row's data for this column</param>
 /// <param name="labels">Labels that can be evaluated to identify the table on the page</param>
 /// <returns>A DocumentLab FluentQuery with a script extension that contains the table column definition</returns>
 public static FluentQuery TableColumn(this FluentQuery response, string columnName, string columnTextType, params TextType[] labels)
 => response.TableColumn(columnName, columnTextType, labels.Select(x => x.ToString()).ToArray());

Exemple #10

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Gets value by label's text given that we know in which direction relative to the label the value is.
 /// </summary>
 /// <param name="labelInDocument">The label we expect to find in the document, valid matches can be separated by ||</param>
 /// <param name="textTypeOfValue">Specifies the text type the capture operation needs to match</param>
 /// <param name="direction">Direction relative to the label the value should be located at in the document</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script that performs the value extraction.</returns>
 public static string GetValueAtLabel(this FluentQuery response, Direction direction, TextType[] textTypeOfValue, params string[] labelInDocument)
 => GetValueAtLabelBase(response, direction, textTypeOfValue.Select(x => x.ToString()).ToArray(), labelInDocument);

Exemple #11

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Gets value by label's text given that we know in which direction relative to the label the value is.
 /// </summary>
 /// <param name="labelInDocument">The label we expect to find in the document, valid matches can be separated by ||</param>
 /// <param name="textTypeOfValue">Specifies the text type the capture operation needs to match</param>
 /// <param name="direction">Direction relative to the label the value should be located at in the document</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script that performs the value extraction.</returns>
 public static string GetValueAtLabel(this FluentQuery response, Direction direction, TextType textTypeOfValue = TextType.Text, params string[] labelInDocument)
 => GetValueAtLabelBase(response, direction, new string[] { textTypeOfValue.ToString() }, labelInDocument);

Exemple #12

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Gets all values of the specified text type in a document.
 /// </summary>
 /// <param name="textType">The text type to capture all instances of in a document.</param>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the Any operation.</returns>
 public static string[] GetAny(this FluentQuery response, TextType textType)
 => GetAnyBase(response, new string[] { textType.ToString() });

Exemple #13

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Gets value by label's text given that we know in which direction relative to the label the value is.
 /// </summary>
 /// <param name="labelInDocument">The label we expect to find in the document, valid matches can be separated by ||</param>
 /// <param name="direction">Direction relative to the label the value should be located at in the document</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script that performs the value extraction.</returns>
 public static string GetValueAtLabel(this FluentQuery response, Direction direction, string[] textTypeOfValue, params string[] labelInDocument)
 => GetValueAtLabelBase(response, direction, textTypeOfValue, labelInDocument);

Exemple #14

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Adds a text type and optional text match predicate to the pattern we want to match in a document.
 /// </summary>
 /// <param name="textType">The text type we want to match in the pattern</param>
 /// <param name="matchText">*Optional* Adds that the text type match also needs to match the text specified. This works by checking if the string we're evaluating with from the document contains the text we specify here + a Levensthein distance 2 (by default) check. Therefore the text here can be an abbreviation of common terms or at least in some simplified form in order to make it more durable to differences that might occur in OCR results.</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script extension that performs the match.</returns>
 public static FluentQuery Match(this FluentQuery response, TextType[] textType, params string[] matchText) => MatchBase(response, textType.Select(x => x.ToString()).ToArray(), matchText);

Exemple #15

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Finds a value by a label's text using the Right-Down search algorithm. The value closest to the label's text in the right or down direction in the document will be chosen as the result.
 /// </summary>
 /// <param name="labelInDocument">The label we expect to find in the document, valid matches can be separated by ||</param>
 /// <param name="textTypeOfValue">Specifies the text type the capture operation needs to match</param>
 /// <param name="maxSteps">The maximum distance in terms of DocumentLab grid cells the label-value in the document can be. This is by default 6 which is sufficient for close-by elements but can be made longer or shorter depending on the type of the document.</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script that performs the value extraction.</returns>
 public static string FindValueForLabel(this FluentQuery response, TextType[] textTypeOfValue, int maxSteps = 6, params string[] labelInDocument)
 => FindValueForLabelBase(response, textTypeOfValue.Select(x => x.ToString()).ToArray(), maxSteps, labelInDocument);

Exemple #16

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Following a pattern predicate or a capture, specify to move *Down* from there to look for the next element in the document for the next operation.
 /// </summary>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the traversal.</returns>
 public static FluentQuery Down(this FluentQuery response) => response.AppendToScript("Down");

Exemple #17

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Following a pattern predicate or a capture, specify to move *Right* from there to look for the next element in the document for the next operation.
 /// </summary>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the traversal.</returns>
 public static FluentQuery Right(this FluentQuery response) => response.AppendToScript("Right");

Exemple #18

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Specifies a tablecolumn for the preceding table query initializer
 /// </summary>
 /// <param name="columnName">The name to assign for the column in the result output</param>
 /// <param name="columnTextType">The text type considered valid in the table row's data for this column</param>
 /// <param name="labels">Labels that can be evaluated to identify the table on the page</param>
 /// <returns>A DocumentLab FluentQuery with a script extension that contains the table column definition</returns>
 public static FluentQuery TableColumn(this FluentQuery response, string columnName, string columnTextType, params string[] labels)
 => response.AppendToScript($"'{columnName}': [{columnTextType}({string.Join("||", labels)})]");

Exemple #19

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Specifies which subset of the page to limit the query to.
 /// </summary>
 /// <param name="subset">A subset definition. Use the static methods in the Subset class to instantiate.</param>
 /// <returns>A Documentab FluentQuery with a script extension that performs the subset operation</returns>
 public static FluentQuery Subset(this FluentQuery response, Subset subset)
 => SubsetBase(response, new Subset[] { subset });

Exemple #20

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Gets all values of the specified text type in a document.
 /// </summary>
 /// <param name="textType">The text type to capture all instances of in a document.</param>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the Any operation.</returns>
 public static string[] GetAny(this FluentQuery response, string[] textType)
 => GetAnyBase(response, textType);

Exemple #21

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Gets all values of the specified text type in a document.
 /// </summary>
 /// <param name="textType">The text type to capture all instances of in a document.</param>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the Any operation.</returns>
 public static string[] GetAny(this FluentQuery response, TextType[] textType)
 => GetAnyBase(response, textType.Select(x => x.ToString()).ToArray());

Exemple #22

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Gets all values of the specified text type in a document.
 /// </summary>
 /// <param name="textType">The text type to capture all instances of in a document.</param>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the Any operation.</returns>
 public static string[] GetAny(this FluentQuery response, string textType)
 => GetAnyBase(response, new string[] { textType });

Exemple #23

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Following a pattern predicate or a capture, specify to move *Up* from there to look for the next element in the document for the next operation.
 /// </summary>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the traversal.</returns>
 public static FluentQuery Up(this FluentQuery response) => response.AppendToScript("Up");

Exemple #24

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Specifies which subsets of the page to limit the query to.
 /// </summary>
 /// <param name="subsets">A subset definition array. Use the static methods in the Subset class to instantiate.</param>
 /// <returns>A Documentab FluentQuery with a script extension that performs the subset operation</returns>
 public static FluentQuery Subset(this FluentQuery response, Subset[] subsets)
 => SubsetBase(response, subsets);

Exemple #25

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Following a pattern predicate or a capture, specify to move *Left* from there to look for the next element in the document for the next operation.
 /// </summary>
 /// <returns>A DocumentLab FluentQuery with a script extension that performs the traversal.</returns>
 public static FluentQuery Left(this FluentQuery response) => response.AppendToScript("Left");

Exemple #26

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 private static string ExecuteSingleCapture(this FluentQuery fluentQuery)
 {
     return(fluentQuery.ExecuteQuery().FirstOrDefault().Value);
 }

Exemple #27

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Adds a text type and optional text match predicate to the pattern we want to match in a document.
 /// </summary>
 /// <param name="textType">The text type we want to match in the pattern</param>
 /// <param name="matchText">*Optional* Adds that the text type match also needs to match the text specified. This works by checking if the string we're evaluating with from the document contains the text we specify here + a Levensthein distance 2 (by default) check. Therefore the text here can be an abbreviation of common terms or at least in some simplified form in order to make it more durable to differences that might occur in OCR results.</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script extension that performs the match.</returns>
 public static FluentQuery Match(this FluentQuery response, TextType textType, params string[] matchText) => MatchBase(response, new string[] { textType.ToString() }, matchText);

Exemple #28

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 private static FluentQuery SubsetBase(FluentQuery response, Subset[] subset)
 {
     return(response.AppendToScript($"Subset({string.Join(", ", subset.ToString())})"));
 }

Exemple #29

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 /// <summary>
 /// Adds a text type and optional text match predicate to the pattern we want to match in a document.
 /// </summary>
 /// <param name="textType">The text type we want to match in the pattern</param>
 /// <param name="matchText">*Optional* Adds that the text type match also needs to match the text specified. This works by checking if the string we're evaluating with from the document contains the text we specify here + a Levensthein distance 2 (by default) check. Therefore the text here can be an abbreviation of common terms or at least in some simplified form in order to make it more durable to differences that might occur in OCR results.</param>
 /// <returns>Returns a DocumentLab FluentQuery with a script extension that performs the match.</returns>
 public static FluentQuery Match(this FluentQuery response, string[] textType, params string[] matchText) => MatchBase(response, textType, matchText);

Exemple #30

0

Afficher le fichier

Fichier : FluentQueryExtensions.cs Projet : mingyangzhu/DocumentLab

 private static FluentQuery MatchBase(FluentQuery response, string[] textType, string[] matchText)
 {
     return(response.AppendToScript(matchText.Count() > 0 ? $"{Or(textType.Select(x => $"{x}({string.Join("||", matchText)})").ToArray())}" : Or(textType)));
 }

C# (CSharp) DocumentLab FluentQuery Exemples