public void Activate(string processId, ClassifierSettingsElastic settings, CancellationToken token)
        {
            try
            {
                GC.Collect();
                machineResourceService.UpdateResourcesManually();
                var freeMemInBytes = machineResourceService.Status.FreeMemory * 1024 * 1024;

                var dictionaryPaths = new List <string>();
                foreach (var nGram in settings.ActivatedNGramList)
                {
                    var directoryPath = string.Format("{0}/{1}/{2}", _dictionaryRootPath, settings.ServiceId, nGram);
                    var fileList      = IOHelper.GetFilesInFolder(directoryPath, DictionaryProtoBuf.GetExtension())
                                        .Where(file => settings.ActivatedTagIdList.Contains(Path.GetFileNameWithoutExtension(file)));
                    dictionaryPaths.AddRange(fileList);
                }

                var sizeInBytes = dictionaryPaths.Sum(f => new FileInfo(f).Length);
                if (freeMemInBytes > 0 && freeMemInBytes < sizeInBytes * Constants.DictionaryInMemoryMultiplier)
                {
                    throw new Common.Exceptions.OutOfResourceException(ServiceResources.NotEnoughResourceToActivateService);
                }

                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                var lockObject = new object();
                var counter    = 0;
                var allCount   = dictionaryPaths.Count;

                var deserializedDics = new ConcurrentBag <DictionaryProtoBuf>();
                Parallel.ForEach(dictionaryPaths, parallelService.ParallelOptions(), (path, loopState) => {
                    token.ThrowIfCancellationRequested();
                    deserializedDics.Add(BaseProtoBuf.DeSerialize <DictionaryProtoBuf>(path));
                    lock (lockObject)
                    {
                        if (++counter % 15 == 0)
                        {
                            processHandler.Changed(processId, Math.Round(counter / (double)allCount * 100, 2));
                        }
                    }
                });

                var globalStoreClassifier = new GlobalStoreClassifier();

                if (deserializedDics.Any())
                {
                    var scorersDic = deserializedDics.GroupBy(d => d.Id).ToDictionary(d => d.Key, d => new Cerebellum.Scorer.PeSScorer(d.ToDictionary(di => di.NGram, di => di.Dictionary)));
                    globalStoreClassifier.ClassifierScorers = scorersDic;
                }
                var tagsDics = settings.Tags.ToDictionary(t => t.Id, t => t);
                var tagsDic  = settings.Tags.ToDictionary(
                    t => t.Id,
                    t => tagService.GetTagModel(settings.DataSetName, t.Id, false, tagsDics)
                    );

                var analyzeQuery = queryFactory.GetAnalyzeQuery(settings.DataSetName);

                var emphasizedTagsWords = new Dictionary <string, List <string> >();
                foreach (var tagId in settings.EmphasizedTagIdList)
                {
                    var tokens = analyzeQuery.Analyze(tagsDic[tagId].Name, 1).ToList();
                    emphasizedTagsWords.Add(tagId, tokens);
                }

                globalStoreClassifier.ClassifierEmphasizedTagIds = emphasizedTagsWords;
                globalStoreClassifier.ClassifiersSettings        = settings;
                globalStoreClassifier.ClassifierTags             = tagsDic;
                globalStoreClassifier.ClassifierParentTagIds     = tagsDic.SelectMany(td => td.Value.Properties.Paths
                                                                                      .Select(p => p.Id)).Distinct().ToDictionary(p => p, p => p);

                GlobalStore.ActivatedClassifiers.Add(settings.ServiceId, globalStoreClassifier);

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyActivated_0_Service_1, ServiceTypeEnum.Classifier, service.Name));
                service.Status = (int)ServiceStatusEnum.Active;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
                if (GlobalStore.ActivatedClassifiers.IsExist(settings.ServiceId))
                {
                    GlobalStore.ActivatedClassifiers.Remove(settings.ServiceId);
                }
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
                GC.Collect();
            }
        }
예제 #2
0
        public IActionResult Keywords(string id, [FromBody] PrcKeywordsRequest request, [FromQuery] bool isStrict = false)
        {
            // If Id is Alias, translate to Id
            if (GlobalStore.ServiceAliases.IsExist(id))
            {
                id = GlobalStore.ServiceAliases.Get(id);
            }

            if (!GlobalStore.ActivatedPrcs.IsExist(id))
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, string.Format(ServiceResources.ServiceNotExistsOrNotActivated, ServiceTypeEnum.Prc)));
            }
            if (!string.IsNullOrEmpty(request.TagId) && !GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.Tags.Any(t => t.Id == request.TagId))
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, ServiceResources.TheGivenTagIsMissingFromThePRCService));
            }

            var dataSet      = GlobalStore.DataSets.Get(GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.DataSetName).DataSet;
            var analyzeQuery = queryFactory.GetAnalyzeQuery(dataSet.Name);

            var tokens = analyzeQuery.Analyze(request.Text, 1).ToList();
            var text   = string.Join(" ", tokens);

            var tagId = string.Empty;

            if (!string.IsNullOrEmpty(request.TagId))
            {
                tagId = request.TagId;
            }
            else
            {
                //ha nincs megadva tagId akkor kiszámoljuk a prc scorer-ekkel
                var allResults = new List <KeyValuePair <string, double> >();
                foreach (var scorerKvp in GlobalStore.ActivatedPrcs.Get(id).PrcScorers)
                {
                    var score = scorerKvp.Value.GetScore(text, 1.7, true);
                    allResults.Add(new KeyValuePair <string, double>(scorerKvp.Key, score));
                }
                var resultsList = allResults.Where(r => r.Value > 0).OrderByDescending(r => r.Value).ToList();
                if (resultsList.Count == 0)
                {
                    return(new OkObjectResult(new List <PrcRecommendationResult>()));
                }
                tagId = resultsList.First().Key;
            }

            var globalSubset = GlobalStore.ActivatedPrcs.Get(id).PrcSubsets[tagId];

            if (globalSubset.WordsWithOccurences == null)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status406NotAcceptable, ServiceResources.TheGivenTagHasNoWordsInDictionary));
            }
            var wordsInDic = globalSubset.WordsWithOccurences.Keys.Intersect(tokens).ToList();

            var baseSubset = new Cerebellum.Subset
            {
                AllWordsOccurencesSumInCorpus = globalSubset.AllWordsOccurencesSumInCorpus,
                AllWordsOccurencesSumInTag    = globalSubset.AllWordsOccurencesSumInTag,
                WordsWithOccurences           = wordsInDic.ToDictionary(w => w, w => globalSubset.WordsWithOccurences[w])
            };
            var baseDic = new Cerebellum.Dictionary.TwisterAlgorithm(baseSubset, true, false).GetDictionary().OrderByDescending(d => d.Value).ToList();

            if (isStrict)
            {
                var avg = baseDic.Sum(d => d.Value) / baseDic.Count;
                baseDic.RemoveAll(d => d.Value < avg);
            }
            return(new OkObjectResult(baseDic.Select(d => new PrcKeywordsResult {
                Word = d.Key, Score = d.Value
            })));
        }