Example #1
0
        public async Task Predict(StorageType sourceStorageType, StorageType destinationStorageType, ChunkMethod chunkType, CognitiveServiceType service)
        {
            InitializeStorage(sourceStorageType, destinationStorageType);
            var charLimit      = _configurationService.GetChunkerConfigModel().CharLimit;
            var chunkLevel     = _configurationService.GetChunkerConfigModel().ChunkSectionLevel;
            var defaultOps     = _configurationService.GetTextAnalyticsConfigModel().DefaultOperations;
            var convertedFiles = new ConcurrentBag <string>();
            var failedFiles    = new ConcurrentDictionary <string, string>();
            // Check which service to run
            var runCustomText    = CognitiveServiceType.CustomText.Equals(service) || CognitiveServiceType.Both.Equals(service);
            var runTextAnalytics = service == CognitiveServiceType.TextAnalytics || service == CognitiveServiceType.Both;

            // read files from source storage
            var fileNames = await _sourceStorageService.ListFilesAsync();

            // parse files
            var tasks = fileNames.Select(async fileName =>
            {
                try
                {
                    // select parser according to type
                    var fileType       = Path.GetExtension(fileName);
                    var parsingService = _parserPoolManager.GetParser(fileType, fileName);
                    // read file
                    _loggerService.LogOperation(OperationType.ReadingFile, fileName);
                    var file = await _sourceStorageService.ReadFileAsync(fileName);
                    // parse file
                    _loggerService.LogOperation(OperationType.ParsingFile, fileName);
                    var parseResult = await parsingService.ParseFile(file);
                    // chunk file
                    _loggerService.LogOperation(OperationType.ChunkingFile, fileName);
                    var chunkedText = _chunkerService.Chunk(parseResult, chunkType, charLimit, chunkLevel);
                    // prediction service
                    _loggerService.LogOperation(OperationType.RunningPrediction, fileName);
                    var queries            = chunkedText.Select(r => r.Text).ToList();
                    var customTextresponse = runCustomText ? await _customTextPredictionService.GetPredictionBatchAsync(queries) : null;
                    var sentimentResponse  = runTextAnalytics && defaultOps.Sentiment ? await _textAnalyticsPredictionService.PredictSentimentBatchAsync(queries) : null;
                    var nerResponse        = runTextAnalytics && defaultOps.Ner ? await _textAnalyticsPredictionService.PredictNerBatchAsync(queries) : null;
                    var keyphraseResponse  = runTextAnalytics && defaultOps.Keyphrase ? await _textAnalyticsPredictionService.PredictKeyphraseBatchAsync(queries) : null;
                    // concatenation service
                    var concatenatedResponse = _concatenationService.ConcatPredictionResult(chunkedText.ToArray(), customTextresponse, sentimentResponse, nerResponse, keyphraseResponse);
                    var responseAsJson       = JsonConvert.SerializeObject(concatenatedResponse, Formatting.Indented);
                    // store file
                    _loggerService.LogOperation(OperationType.StoringResult, fileName);
                    var newFileName = Path.GetFileNameWithoutExtension(fileName) + ".json";
                    await _destinationStorageService.StoreDataAsync(responseAsJson, newFileName);
                    convertedFiles.Add(fileName);
                }
                catch (CliException e)
                {
                    failedFiles[fileName] = e.Message;
                    _loggerService.LogError(e);
                }
            });
            await Task.WhenAll(tasks);

            _loggerService.LogParsingResult(convertedFiles, failedFiles);
        }
        public async Task ExtractText(StorageType sourceStorageType, StorageType destinationStorageType, ChunkMethod chunkType)
        {
            InitializeStorage(sourceStorageType, destinationStorageType);
            var charLimit      = _configurationService.GetChunkerConfigModel().CharLimit;
            var chunkLevel     = _configurationService.GetChunkerConfigModel().ChunkSectionLevel;
            var convertedFiles = new ConcurrentBag <string>();
            var failedFiles    = new ConcurrentDictionary <string, string>();

            // read files from source storage
            var fileNames = await _sourceStorageService.ListFilesAsync();

            // parse files
            var tasks = fileNames.Select(async fileName =>
            {
                try
                {
                    // select parser according to type
                    var fileType       = Path.GetExtension(fileName);
                    var parsingService = _parserPoolManager.GetParser(fileType, fileName);
                    // read file
                    _loggerService.LogOperation(OperationType.ReadingFile, fileName);
                    var file = await _sourceStorageService.ReadFileAsync(fileName);
                    // parse file
                    _loggerService.LogOperation(OperationType.ParsingFile, fileName);
                    var parseResult = await parsingService.ParseFile(file);
                    // chunk file
                    _loggerService.LogOperation(OperationType.ChunkingFile, fileName);
                    var chunkedText = _chunkerService.Chunk(parseResult, chunkType, charLimit, chunkLevel);
                    // store file
                    _loggerService.LogOperation(OperationType.StoringResult, fileName);
                    foreach (var item in chunkedText.Select((value, i) => (value, i)))
                    {
                        var newFileName = ChunkInfoHelper.GetChunkFileName(fileName, item.i);
                        await _destinationStorageService.StoreDataAsync(item.value.Text, newFileName);
                    }
                    convertedFiles.Add(fileName);
                }
                catch (CliException e)
                {
                    failedFiles[fileName] = e.Message;
                    _loggerService.LogError(e);
                }
            });
            await Task.WhenAll(tasks);

            _loggerService.LogParsingResult(convertedFiles, failedFiles);
        }
        public async Task ChunkTextAsync(StorageType sourceStorageType, StorageType destinationStorageType)
        {
            InitializeStorage(sourceStorageType, destinationStorageType);
            var charLimit      = _configurationService.GetChunkerConfigModel().CharLimit;
            var convertedFiles = new ConcurrentBag <string>();
            var failedFiles    = new ConcurrentDictionary <string, string>();

            // read files from source storage
            var fileNames = await _sourceStorageService.ListFilesAsync();

            // chunk files
            var tasks = fileNames.Select(async fileName =>
            {
                try
                {
                    // validate types
                    _parserService.ValidateFileType(fileName);
                    // read file
                    _loggerService.LogOperation(OperationType.ReadingFile, fileName);
                    var file = await _sourceStorageService.ReadFileAsync(fileName);
                    // parse file
                    var parsedFile = await _parserService.ParseFile(file);
                    // chunk file
                    _loggerService.LogOperation(OperationType.ChunkingFile, fileName);
                    List <ChunkInfo> chunkedText = _chunkerService.Chunk(parsedFile, ChunkMethod.Char, charLimit, ElementType.Other);
                    // store file
                    _loggerService.LogOperation(OperationType.StoringResult, fileName);
                    foreach (var item in chunkedText.Select((value, i) => (value, i)))
                    {
                        var newFileName = ChunkInfoHelper.GetChunkFileName(fileName, item.i);
                        await _destinationStorageService.StoreDataAsync(item.value.Text, newFileName);
                    }
                    convertedFiles.Add(fileName);
                }
                catch (CliException e)
                {
                    failedFiles[fileName] = e.Message;
                    _loggerService.LogError(e);
                }
            });
            await Task.WhenAll(tasks);

            _loggerService.LogParsingResult(convertedFiles, failedFiles);
        }