コード例 #1
0
ファイル: BatchParser.cs プロジェクト: sovren/sovren-dotnet
        private static IEnumerable <string> GetFiles(BatchParsingRules rules, string directory, SearchOption searchOption)
        {
            if (string.IsNullOrWhiteSpace(directory))
            {
                throw new ArgumentNullException(nameof(directory));
            }
            if (rules == null)
            {
                throw new ArgumentNullException(nameof(rules));
            }

            //use the rules to determine if this batch of files is valid (and which files)
            IEnumerable <string> files = Directory.EnumerateFiles(directory, "*", searchOption)
                                         .Where(f => rules.FileIsAllowed(f));

            if (files == null || files.Count() == 0)
            {
                throw new SovrenInvalidBatchException("No files found in given directory");
            }

            if (files.Count() > rules.MaxBatchSize)
            {
                throw new SovrenInvalidBatchException("This batch is too large to process.");
            }

            return(files);
        }
コード例 #2
0
ファイル: BatchParser.cs プロジェクト: sovren/sovren-dotnet
        /// <summary>
        /// Parses a batch of resumes
        /// </summary>
        /// <param name="apiClient">The API client to use to parse the files</param>
        /// <param name="parseOptions">Any parsing/indexing options</param>
        /// <param name="rules">
        /// The rules that should be applied to whatever files are found prior to parsing.
        /// This is important to reduce the number of invalid parse API calls and reduce parsing costs.
        /// </param>
        /// <param name="directory">The directory containing the files to be parsed</param>
        /// <param name="searchOption"></param>
        /// <param name="successCallback">A callback for when a file is parsed successfully</param>
        /// <param name="partialSuccessCallback">A callback for when some error happened during/after parsing, but there is still usable data in the response</param>
        /// <param name="errorCallback">A callback for when an error occurred when parsing the file, and there is no usable data</param>
        /// <param name="generateDocumentIdFn">A callback so you can specify a DocumentId for each file that is parsed</param>
        /// <exception cref="SovrenInvalidBatchException">Thrown when the directory provided does not meet the <see cref="BatchParsingRules"/></exception>
        public static async Task ParseResumes(
            SovrenClient apiClient,
            ParseOptions parseOptions,
            BatchParsingRules rules,
            string directory,
            SearchOption searchOption,
            Func <ResumeBatchSuccessResult, Task> successCallback,
            Func <ResumeBatchPartialSuccessResult, Task> partialSuccessCallback,
            Func <BatchErrorResult, Task> errorCallback,
            Func <string, string> generateDocumentIdFn = null)
        {
            if (apiClient == null)
            {
                throw new ArgumentNullException(nameof(apiClient));
            }

            IEnumerable <string> files = GetFiles(rules, directory, searchOption);

            //process the batch serially, since multi-threading could cause the customer to violate the AUP accidentally
            foreach (string file in files)
            {
                Document doc   = new Document(file);
                string   docId = generateDocumentIdFn == null?Guid.NewGuid().ToString() : generateDocumentIdFn(file);

                try
                {
                    //set document id if we plan to index these documents
                    if (parseOptions?.IndexingOptions != null)
                    {
                        parseOptions.IndexingOptions.DocumentId = docId;
                    }

                    ParseRequest        request  = new ParseRequest(doc, parseOptions);
                    ParseResumeResponse response = await apiClient.ParseResume(request);

                    if (successCallback != null)
                    {
                        await successCallback(new ResumeBatchSuccessResult(file, docId, response));
                    }
                }
                catch (SovrenUsableResumeException e)
                {
                    //this happens when something wasn't 100% successful, but there still might be usable data
                    if (partialSuccessCallback != null)
                    {
                        await partialSuccessCallback(new ResumeBatchPartialSuccessResult(file, docId, e));
                    }
                }
                catch (SovrenException e)
                {
                    //this happens where there is no usable data
                    if (errorCallback != null)
                    {
                        await errorCallback(new BatchErrorResult(file, docId, e));
                    }
                }
            }
        }