Exemplo n.º 1
0
        private void WarmUpService <TServiceSettings, THandler>(ServiceTypeEnum serviceType)
            where TServiceSettings : BaseServiceSettingsElastic
            where THandler : IServiceHandler <TServiceSettings>
        {
            var services = serviceQuery.GetByType((int)serviceType).ToList();

            var handler = serviceProvider.GetService <THandler>();

            foreach (var service in services)
            {
                ProcessTypeEnum processType;
                switch (serviceType)
                {
                case ServiceTypeEnum.Classifier:
                    processType = ProcessTypeEnum.ClassifierActivate;
                    break;

                case ServiceTypeEnum.Prc:
                    processType = ProcessTypeEnum.PrcActivate;
                    break;

                case ServiceTypeEnum.Search:
                    processType = ProcessTypeEnum.SearchActivate;
                    break;

                default:
                    throw new Exception("Try to warm up service with undefined process activation type!");
                }

                if (service.Status != (int)ServiceStatusEnum.Active)
                {
                    continue;
                }

                var settings = serviceQuery.GetSettings <TServiceSettings>(service.Id);
                var process  = processHandler.Create(
                    processType,
                    service.Id,
                    service,
                    string.Format(Resources.ServiceResources.Activating_0_Service_1, serviceType, service.Name));

                service.ProcessIdList.Add(process.Id);
                serviceQuery.Update(service.Id, service);

                processHandler.Start(process, (tokenSource) => handler.Activate(process.Id, settings, tokenSource.Token));
            }
        }
Exemplo n.º 2
0
        public void Prepare(string processId, SearchSettingsWrapperElastic settings, CancellationToken token)
        {
            try
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Classifier, service.Name));
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.New;
                serviceQuery.Update(service.Id, service);
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
            }
        }
        public void Prepare(string processId, ClassifierSettingsElastic settings, CancellationToken token)
        {
            var directoryPath = GetDirectoryPath(settings.ServiceId);

            try
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                IOHelper.SafeDeleteDictionary(directoryPath, true);

                var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName);
                var dataSet            = globalStoreDataSet.DataSet;
                var allDicCount        = settings.NGramList.Count * settings.Tags.Count;
                var counter            = 0;
                var lockObject         = new object();

                Directory.CreateDirectory(directoryPath);

                foreach (var nGram in settings.NGramList)
                {
                    var subsetCreator = new SubsetCreator(dataSet.Name, new List <string> {
                        DocumentElastic.TextField
                    }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), nGram, queryFactory, globalStoreDataSet.AttachmentFields);
                    var actualDirectory = string.Format("{0}/{1}", directoryPath, nGram);

                    Directory.CreateDirectory(actualDirectory);

                    Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) => {
                        token.ThrowIfCancellationRequested();

                        var subset     = subsetCreator.CreateByTag(tag.Id, dataSet.TagField);
                        var dictionary = new TwisterAlgorithm(
                            subset, true, false,
                            settings.CompressSettings.CompressCategoryOccurence,
                            settings.CompressSettings.CompressDataSetOccurence,
                            (LogicalOperatorEnum)settings.CompressSettings.CompressOperator).GetDictionary();
                        var dicProtoBuf = new DictionaryProtoBuf
                        {
                            Id         = tag.Id,
                            Dictionary = dictionary,
                            NGram      = nGram
                        };

                        lock (lockObject)
                        {
                            dicProtoBuf.Serialize(string.Format("{0}/{1}", actualDirectory, dicProtoBuf.GetFileName()));
                            processHandler.Changed(processId, Math.Round(++counter / (double)allDicCount * 100, 2));
                        }
                    });
                }

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Classifier, service.Name));
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.New;
                serviceQuery.Update(service.Id, service);
                IOHelper.SafeDeleteDictionary(directoryPath, true);

                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
            }
        }
Exemplo n.º 4
0
        public IActionResult Prepare(string id, [FromBody] ClassifierPrepareSettings classifierPrepareSettings)
        {
            //SERVICE VALIDATION
            var service = serviceQuery.Get(id);

            if (service == null)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status404NotFound, ServiceResources.InvalidIdNotExistingService));
            }
            if (service.Type != (int)ServiceTypeEnum.Classifier)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, string.Format(ServiceResources.InvalidServiceTypeOnly_0_ServicesAreValidForThisRequest, "Classifier")));
            }
            if (service.Status != (int)ServiceStatusEnum.New)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, ServiceResources.InvalidStatusOnlyTheServicesWithNewStatusCanBePrepared));
            }

            //DATASET VALIDATION
            if (!GlobalStore.DataSets.IsExist(classifierPrepareSettings.DataSetName))
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest,
                                                         string.Format(ServiceResources.DataSet_0_NotFound, classifierPrepareSettings.DataSetName)));
            }

            var globalStoreDataSet = GlobalStore.DataSets.Get(classifierPrepareSettings.DataSetName);
            var dataSet            = globalStoreDataSet.DataSet;

            //NGRAM COUNT LIST VALIDATION
            var nGramResult = CommonValidators.ValidateNGramList(classifierPrepareSettings.NGramList, dataSet.NGramCount);

            if (nGramResult.IsFailure)
            {
                return(HttpErrorResult(StatusCodes.Status400BadRequest, nGramResult.Error));
            }

            //TAGS VALIDATION
            var tagQuery = queryFactory.GetTagQuery(dataSet.Name);
            List <TagElastic> tags;

            if (classifierPrepareSettings?.TagIdList?.Any() == true)
            {
                tags = tagQuery.Get(classifierPrepareSettings.TagIdList).ToList();
                if (tags.Count < classifierPrepareSettings.TagIdList.Count)
                {
                    var missingTagIds = classifierPrepareSettings.TagIdList.Except(tags.Select(t => t.Id)).ToList();
                    return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest,
                                                             string.Format(ServiceResources.TheFollowingTagIdsNotExistInTheDataSet_0, string.Join(", ", missingTagIds))));
                }
            }
            else
            {
                tags = tagQuery.GetAll().Items.Where(i => i.IsLeaf).ToList();
            }

            //SAVE SETTINGS TO ELASTIC
            var serviceSettings = new ClassifierSettingsElastic {
                DataSetName      = globalStoreDataSet.IndexName,
                ServiceId        = service.Id,
                NGramList        = classifierPrepareSettings.NGramList,
                Tags             = tags,
                CompressSettings = CompressHelper.ToCompressSettingsElastic(classifierPrepareSettings.CompressSettings, classifierPrepareSettings.CompressLevel)
            };

            serviceQuery.IndexSettings(serviceSettings);

            var process = processHandler.Create(
                ProcessTypeEnum.ClassifierPrepare,
                service.Id, classifierPrepareSettings,
                string.Format(ServiceResources.Preparing_0_Service_1, ServiceTypeEnum.Classifier, service.Name));

            service.ProcessIdList.Add(process.Id);
            serviceQuery.Update(service.Id, service);

            processHandler.Start(process, (tokenSource) => classifierHandler.Prepare(process.Id, serviceSettings, tokenSource.Token));

            return(new HttpStatusCodeWithObjectResult(StatusCodes.Status202Accepted, process.ToProcessModel()));
        }
Exemplo n.º 5
0
        public IActionResult Prepare(string id, [FromBody] SearchPrepareSettings searchPrepareSettings)
        {
            //SERVICE VALIDATION
            var service = serviceQuery.Get(id);

            if (service == null)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status404NotFound, ServiceResources.InvalidIdNotExistingService));
            }
            if (service.Type != (int)ServiceTypeEnum.Search)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, string.Format(ServiceResources.InvalidServiceTypeOnly_0_ServicesAreValidForThisRequest, "Search")));
            }
            if (service.Status != (int)ServiceStatusEnum.New)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, ServiceResources.InvalidStatusOnlyTheServicesWithNewStatusCanBePrepared));
            }

            //DATASET VALIDATION
            if (!GlobalStore.DataSets.IsExist(searchPrepareSettings.DataSetName))
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest,
                                                         string.Format(ServiceResources.DataSet_0_NotFound, searchPrepareSettings.DataSetName)));
            }

            var globalStoreDataSet = GlobalStore.DataSets.Get(searchPrepareSettings.DataSetName);
            var dataSet            = globalStoreDataSet.DataSet;

            var serviceSettings = new SearchSettingsWrapperElastic
            {
                DataSetName = globalStoreDataSet.DataSet.Name,
                ServiceId   = service.Id
            };

            // SETUP default values for Activation - here we can calculate more accurate settings for the dataset

            var defaultActivationSettings = new SearchActivateSettings();

            serviceSettings.AutoCompleteSettings = new AutoCompleteSettingsElastic
            {
                Confidence    = 2.0,
                Count         = 3,
                MaximumErrors = 0.5
            };

            serviceSettings.SearchSettings = new SearchSettingsElastic
            {
                Count             = 3,
                CutOffFrequency   = 0.001,
                Filter            = null,
                Fuzziness         = -1,
                ResponseFieldList = dataSet.InterpretedFields.Union(new List <string> {
                    dataSet.IdField, dataSet.TagField
                }).ToList(),
                SearchFieldList   = dataSet.InterpretedFields,
                Type              = (int)SearchTypeEnum.Match,
                Weights           = null,
                Operator          = (int)LogicalOperatorEnum.OR,
                UseDefaultFilter  = true,
                UseDefaultWeights = true,
                Order             = null
            };

            serviceQuery.IndexSettings(serviceSettings);

            var process = processHandler.Create(
                ProcessTypeEnum.SearchPrepare,
                service.Id, searchPrepareSettings,
                string.Format(ServiceResources.Preparing_0_Service_1, ServiceTypeEnum.Search, service.Name));

            service.ProcessIdList.Add(process.Id);
            serviceQuery.Update(service.Id, service);

            processHandler.Start(process, (tokenSource) => searchHandler.Prepare(process.Id, serviceSettings, tokenSource.Token));

            return(new HttpStatusCodeWithObjectResult(StatusCodes.Status202Accepted, process.ToProcessModel()));
        }
Exemplo n.º 6
0
        public void Prepare(string processId, PrcSettingsElastic settings, CancellationToken token)
        {
            var directoryPath = GetDirectoryPath(settings.ServiceId);

            try
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                IOHelper.SafeDeleteDictionary(directoryPath, true);

                var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName);
                var dataSet            = globalStoreDataSet.DataSet;
                var progress           = new Progress(settings.Tags.Count);
                var subsetCreator      = new SubsetCreator(dataSet.Name, new List <string> {
                    DocumentElastic.TextField
                }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), 1, queryFactory, globalStoreDataSet.AttachmentFields);

                Directory.CreateDirectory(directoryPath);

                var logPrefix = $"Prc Prepare {processId}";
                logger.LogInformation($"{logPrefix} starts with ParallelLimit: {parallelService.ParallelLimit}, Tags Count: {settings.Tags.Count}");

                var lockObject = new object();

                Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) =>
                {
                    token.ThrowIfCancellationRequested();

                    logger.LogInformation($"{logPrefix} preparing Tag: `{tag}`");

                    var subset    = subsetCreator.CreateByTag(tag.Id, dataSet.TagField);
                    var algorithm = new TwisterAlgorithm(
                        subset, true, true,
                        settings.CompressSettings.CompressCategoryOccurence,
                        settings.CompressSettings.CompressDataSetOccurence,
                        (LogicalOperatorEnum)settings.CompressSettings.CompressOperator);


                    algorithm.InitTagDictionary();
                    var notNeededWords = subset.WordsWithOccurences.Keys.Except(
                        algorithm.TagDictionary
                        .Where(sd => sd.Value.PMI > 0)
                        .Select(sd => sd.Key)).ToList();

                    var td = algorithm.GetDictionary();

                    foreach (var word in notNeededWords)
                    {
                        subset.WordsWithOccurences.Remove(word);
                    }

                    lock (lockObject)
                    {
                        //dictionary serialization
                        var dicProtoBuf = new DictionaryProtoBuf
                        {
                            Id         = tag.Id,
                            Dictionary = td,
                            NGram      = 1
                        };
                        dicProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, dicProtoBuf.GetFileName()));

                        //subset serialization
                        var subsetProtoBuf = new SubsetProtoBuf
                        {
                            Id = tag.Id,
                            WordsWithOccurences        = subset.WordsWithOccurences,
                            AllWordsOccurencesSumInTag = subset.AllWordsOccurencesSumInTag,
                            AllOccurencesSumInCorpus   = subset.AllWordsOccurencesSumInCorpus
                        };
                        subsetProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, subsetProtoBuf.GetFileName()));

                        progress.Step();
                        processHandler.Changed(processId, progress.Percent.Round(2));
                    }

                    logger.LogInformation($"{logPrefix} prepared Tag: `{tag}`");
                });

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Prc, service.Name));
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.New;
                serviceQuery.Update(service.Id, service);
                IOHelper.SafeDeleteDictionary(directoryPath, true);
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
            }
        }