Beispiel #1
0
        public void Prepare(string processId, SearchSettingsWrapperElastic settings, CancellationToken token)
        {
            try
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Classifier, service.Name));
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.New;
                serviceQuery.Update(service.Id, service);
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
            }
        }
        public void Prepare(string processId, ClassifierSettingsElastic settings, CancellationToken token)
        {
            var directoryPath = GetDirectoryPath(settings.ServiceId);

            try
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                IOHelper.SafeDeleteDictionary(directoryPath, true);

                var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName);
                var dataSet            = globalStoreDataSet.DataSet;
                var allDicCount        = settings.NGramList.Count * settings.Tags.Count;
                var counter            = 0;
                var lockObject         = new object();

                Directory.CreateDirectory(directoryPath);

                foreach (var nGram in settings.NGramList)
                {
                    var subsetCreator = new SubsetCreator(dataSet.Name, new List <string> {
                        DocumentElastic.TextField
                    }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), nGram, queryFactory, globalStoreDataSet.AttachmentFields);
                    var actualDirectory = string.Format("{0}/{1}", directoryPath, nGram);

                    Directory.CreateDirectory(actualDirectory);

                    Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) => {
                        token.ThrowIfCancellationRequested();

                        var subset     = subsetCreator.CreateByTag(tag.Id, dataSet.TagField);
                        var dictionary = new TwisterAlgorithm(
                            subset, true, false,
                            settings.CompressSettings.CompressCategoryOccurence,
                            settings.CompressSettings.CompressDataSetOccurence,
                            (LogicalOperatorEnum)settings.CompressSettings.CompressOperator).GetDictionary();
                        var dicProtoBuf = new DictionaryProtoBuf
                        {
                            Id         = tag.Id,
                            Dictionary = dictionary,
                            NGram      = nGram
                        };

                        lock (lockObject)
                        {
                            dicProtoBuf.Serialize(string.Format("{0}/{1}", actualDirectory, dicProtoBuf.GetFileName()));
                            processHandler.Changed(processId, Math.Round(++counter / (double)allDicCount * 100, 2));
                        }
                    });
                }

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Classifier, service.Name));
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.New;
                serviceQuery.Update(service.Id, service);
                IOHelper.SafeDeleteDictionary(directoryPath, true);

                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
            }
        }
        public void CopyOrMove(string processId, string dataSetName, IEnumerable <string> documentIds, string targetDataSetName, int parallelLimit, bool isMove, CancellationToken token, string hostUrl)
        {
            var results = new BulkResults();
            //// TODO: Validate target schema
            var copiedDocumentIds = new ConcurrentBag <string>();
            var parallelOptions   = new ParallelOptions {
                MaxDegreeOfParallelism = parallelLimit
            };
            var sourceDocumentQuery = DocumentQuery(dataSetName);
            var targetDocumentQuery = DocumentQuery(targetDataSetName);
            var allCount            = documentIds.Count();

            Parallel.ForEach(
                documentIds.Batch(siteConfig.Resources.MaxSearchBulkCount),
                parallelOptions,
                (batchIds, loopState) =>
            {
                try
                {
                    if (token.IsCancellationRequested)
                    {
                        processHandler.Cancelled(processId);
                        return;
                    }
                    var batchDocuments    = sourceDocumentQuery.Get(batchIds);
                    var interpretedFields = DataSet(dataSetName).DataSet.InterpretedFields;

                    foreach (var document in batchDocuments.Where(doc => string.IsNullOrEmpty(doc.Text)))
                    {
                        document.Text = DocumentHelper.GetConcatenatedText(document.DocumentObject, interpretedFields);
                    }

                    var bulkResponse = targetDocumentQuery.Index(batchDocuments);
                    results.Results.AddRange(bulkResponse.ToBulkResult());
                    processHandler.Changed(processId, Math.Round(results.Results.Count / (double)allCount * 100, 2));
                }
                catch (Exception ex)
                {
                    processHandler.Interrupted(processId, ex);
                    loopState.Stop();
                }
            });
            targetDocumentQuery.Flush();

            var succeedDocumentIds = results.Results.Where(r => r.StatusCode == StatusCodes.Status200OK).Select(r => r.Id).ToList();
            var failedDocumentIds  = results.Results.Where(r => r.StatusCode != StatusCodes.Status200OK).Select(r => r.Id).ToList();

            if (isMove)
            {
                if (succeedDocumentIds.Any())
                {
                    sourceDocumentQuery.Delete(succeedDocumentIds);
                    sourceDocumentQuery.Flush();
                }
            }

            // save the response
            var fileName   = string.Format("{0}.json", processId);
            var resultPath = string.Format("{0}/{1}", siteConfig.Directory.User, fileName);

            File.AppendAllText(resultPath, JsonConvert.SerializeObject(results.Results.OrderByDescending(r => r.StatusCode), Formatting.Indented));
            var url = string.Format("{0}{1}/{2}", hostUrl, Constants.FilesPath, fileName);

            processHandler.Finished(processId,
                                    string.Format("{0}\n{1}",
                                                  string.Format(
                                                      isMove ?
                                                      DocumentResources.MoveFinishedFrom_0_To_1_Succeeded_2_Failed_3 :
                                                      DocumentResources.CopyFinishedFrom_0_To_1_Succeeded_2_Failed_3,
                                                      dataSetName, targetDataSetName, succeedDocumentIds.Count, failedDocumentIds.Count),
                                                  string.Format(DocumentResources.ResultFileCanBeDownloadFromHere_0, url)
                                                  ));
        }
Beispiel #4
0
        public void Prepare(string processId, PrcSettingsElastic settings, CancellationToken token)
        {
            var directoryPath = GetDirectoryPath(settings.ServiceId);

            try
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                IOHelper.SafeDeleteDictionary(directoryPath, true);

                var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName);
                var dataSet            = globalStoreDataSet.DataSet;
                var progress           = new Progress(settings.Tags.Count);
                var subsetCreator      = new SubsetCreator(dataSet.Name, new List <string> {
                    DocumentElastic.TextField
                }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), 1, queryFactory, globalStoreDataSet.AttachmentFields);

                Directory.CreateDirectory(directoryPath);

                var logPrefix = $"Prc Prepare {processId}";
                logger.LogInformation($"{logPrefix} starts with ParallelLimit: {parallelService.ParallelLimit}, Tags Count: {settings.Tags.Count}");

                var lockObject = new object();

                Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) =>
                {
                    token.ThrowIfCancellationRequested();

                    logger.LogInformation($"{logPrefix} preparing Tag: `{tag}`");

                    var subset    = subsetCreator.CreateByTag(tag.Id, dataSet.TagField);
                    var algorithm = new TwisterAlgorithm(
                        subset, true, true,
                        settings.CompressSettings.CompressCategoryOccurence,
                        settings.CompressSettings.CompressDataSetOccurence,
                        (LogicalOperatorEnum)settings.CompressSettings.CompressOperator);


                    algorithm.InitTagDictionary();
                    var notNeededWords = subset.WordsWithOccurences.Keys.Except(
                        algorithm.TagDictionary
                        .Where(sd => sd.Value.PMI > 0)
                        .Select(sd => sd.Key)).ToList();

                    var td = algorithm.GetDictionary();

                    foreach (var word in notNeededWords)
                    {
                        subset.WordsWithOccurences.Remove(word);
                    }

                    lock (lockObject)
                    {
                        //dictionary serialization
                        var dicProtoBuf = new DictionaryProtoBuf
                        {
                            Id         = tag.Id,
                            Dictionary = td,
                            NGram      = 1
                        };
                        dicProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, dicProtoBuf.GetFileName()));

                        //subset serialization
                        var subsetProtoBuf = new SubsetProtoBuf
                        {
                            Id = tag.Id,
                            WordsWithOccurences        = subset.WordsWithOccurences,
                            AllWordsOccurencesSumInTag = subset.AllWordsOccurencesSumInTag,
                            AllOccurencesSumInCorpus   = subset.AllWordsOccurencesSumInCorpus
                        };
                        subsetProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, subsetProtoBuf.GetFileName()));

                        progress.Step();
                        processHandler.Changed(processId, progress.Percent.Round(2));
                    }

                    logger.LogInformation($"{logPrefix} prepared Tag: `{tag}`");
                });

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Prc, service.Name));
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.New;
                serviceQuery.Update(service.Id, service);
                IOHelper.SafeDeleteDictionary(directoryPath, true);
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
            }
        }