public void Activate(string processId, ClassifierSettingsElastic settings, CancellationToken token) { try { GC.Collect(); machineResourceService.UpdateResourcesManually(); var freeMemInBytes = machineResourceService.Status.FreeMemory * 1024 * 1024; var dictionaryPaths = new List <string>(); foreach (var nGram in settings.ActivatedNGramList) { var directoryPath = string.Format("{0}/{1}/{2}", _dictionaryRootPath, settings.ServiceId, nGram); var fileList = IOHelper.GetFilesInFolder(directoryPath, DictionaryProtoBuf.GetExtension()) .Where(file => settings.ActivatedTagIdList.Contains(Path.GetFileNameWithoutExtension(file))); dictionaryPaths.AddRange(fileList); } var sizeInBytes = dictionaryPaths.Sum(f => new FileInfo(f).Length); if (freeMemInBytes > 0 && freeMemInBytes < sizeInBytes * Constants.DictionaryInMemoryMultiplier) { throw new Common.Exceptions.OutOfResourceException(ServiceResources.NotEnoughResourceToActivateService); } var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Busy; serviceQuery.Update(service.Id, service); var lockObject = new object(); var counter = 0; var allCount = dictionaryPaths.Count; var deserializedDics = new ConcurrentBag <DictionaryProtoBuf>(); Parallel.ForEach(dictionaryPaths, parallelService.ParallelOptions(), (path, loopState) => { token.ThrowIfCancellationRequested(); deserializedDics.Add(BaseProtoBuf.DeSerialize <DictionaryProtoBuf>(path)); lock (lockObject) { if (++counter % 15 == 0) { processHandler.Changed(processId, Math.Round(counter / (double)allCount * 100, 2)); } } }); var globalStoreClassifier = new GlobalStoreClassifier(); if (deserializedDics.Any()) { var scorersDic = deserializedDics.GroupBy(d => d.Id).ToDictionary(d => d.Key, d => new Cerebellum.Scorer.PeSScorer(d.ToDictionary(di => di.NGram, di => di.Dictionary))); globalStoreClassifier.ClassifierScorers = scorersDic; } var tagsDics = settings.Tags.ToDictionary(t => t.Id, t => t); var tagsDic = settings.Tags.ToDictionary( t => t.Id, t => tagService.GetTagModel(settings.DataSetName, t.Id, false, tagsDics) ); var analyzeQuery = queryFactory.GetAnalyzeQuery(settings.DataSetName); var emphasizedTagsWords = new Dictionary <string, List <string> >(); foreach (var tagId in settings.EmphasizedTagIdList) { var tokens = analyzeQuery.Analyze(tagsDic[tagId].Name, 1).ToList(); emphasizedTagsWords.Add(tagId, tokens); } globalStoreClassifier.ClassifierEmphasizedTagIds = emphasizedTagsWords; globalStoreClassifier.ClassifiersSettings = settings; globalStoreClassifier.ClassifierTags = tagsDic; globalStoreClassifier.ClassifierParentTagIds = tagsDic.SelectMany(td => td.Value.Properties.Paths .Select(p => p.Id)).Distinct().ToDictionary(p => p, p => p); GlobalStore.ActivatedClassifiers.Add(settings.ServiceId, globalStoreClassifier); processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyActivated_0_Service_1, ServiceTypeEnum.Classifier, service.Name)); service.Status = (int)ServiceStatusEnum.Active; serviceQuery.Update(service.Id, service); } catch (Exception ex) { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Prepared; serviceQuery.Update(service.Id, service); if (GlobalStore.ActivatedClassifiers.IsExist(settings.ServiceId)) { GlobalStore.ActivatedClassifiers.Remove(settings.ServiceId); } if (ex.InnerException != null && ex.InnerException is OperationCanceledException) { processHandler.Cancelled(processId); } else { processHandler.Interrupted(processId, ex); } GC.Collect(); } }
public IActionResult Keywords(string id, [FromBody] PrcKeywordsRequest request, [FromQuery] bool isStrict = false) { // If Id is Alias, translate to Id if (GlobalStore.ServiceAliases.IsExist(id)) { id = GlobalStore.ServiceAliases.Get(id); } if (!GlobalStore.ActivatedPrcs.IsExist(id)) { return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, string.Format(ServiceResources.ServiceNotExistsOrNotActivated, ServiceTypeEnum.Prc))); } if (!string.IsNullOrEmpty(request.TagId) && !GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.Tags.Any(t => t.Id == request.TagId)) { return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, ServiceResources.TheGivenTagIsMissingFromThePRCService)); } var dataSet = GlobalStore.DataSets.Get(GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.DataSetName).DataSet; var analyzeQuery = queryFactory.GetAnalyzeQuery(dataSet.Name); var tokens = analyzeQuery.Analyze(request.Text, 1).ToList(); var text = string.Join(" ", tokens); var tagId = string.Empty; if (!string.IsNullOrEmpty(request.TagId)) { tagId = request.TagId; } else { //ha nincs megadva tagId akkor kiszámoljuk a prc scorer-ekkel var allResults = new List <KeyValuePair <string, double> >(); foreach (var scorerKvp in GlobalStore.ActivatedPrcs.Get(id).PrcScorers) { var score = scorerKvp.Value.GetScore(text, 1.7, true); allResults.Add(new KeyValuePair <string, double>(scorerKvp.Key, score)); } var resultsList = allResults.Where(r => r.Value > 0).OrderByDescending(r => r.Value).ToList(); if (resultsList.Count == 0) { return(new OkObjectResult(new List <PrcRecommendationResult>())); } tagId = resultsList.First().Key; } var globalSubset = GlobalStore.ActivatedPrcs.Get(id).PrcSubsets[tagId]; if (globalSubset.WordsWithOccurences == null) { return(new HttpStatusCodeWithErrorResult(StatusCodes.Status406NotAcceptable, ServiceResources.TheGivenTagHasNoWordsInDictionary)); } var wordsInDic = globalSubset.WordsWithOccurences.Keys.Intersect(tokens).ToList(); var baseSubset = new Cerebellum.Subset { AllWordsOccurencesSumInCorpus = globalSubset.AllWordsOccurencesSumInCorpus, AllWordsOccurencesSumInTag = globalSubset.AllWordsOccurencesSumInTag, WordsWithOccurences = wordsInDic.ToDictionary(w => w, w => globalSubset.WordsWithOccurences[w]) }; var baseDic = new Cerebellum.Dictionary.TwisterAlgorithm(baseSubset, true, false).GetDictionary().OrderByDescending(d => d.Value).ToList(); if (isStrict) { var avg = baseDic.Sum(d => d.Value) / baseDic.Count; baseDic.RemoveAll(d => d.Value < avg); } return(new OkObjectResult(baseDic.Select(d => new PrcKeywordsResult { Word = d.Key, Score = d.Value }))); }