Пример #1
0
        public void Prepare(string processId, PrcSettingsElastic settings, CancellationToken token)
        {
            var directoryPath = GetDirectoryPath(settings.ServiceId);

            try
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                IOHelper.SafeDeleteDictionary(directoryPath, true);

                var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName);
                var dataSet            = globalStoreDataSet.DataSet;
                var progress           = new Progress(settings.Tags.Count);
                var subsetCreator      = new SubsetCreator(dataSet.Name, new List <string> {
                    DocumentElastic.TextField
                }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), 1, queryFactory, globalStoreDataSet.AttachmentFields);

                Directory.CreateDirectory(directoryPath);

                var logPrefix = $"Prc Prepare {processId}";
                logger.LogInformation($"{logPrefix} starts with ParallelLimit: {parallelService.ParallelLimit}, Tags Count: {settings.Tags.Count}");

                var lockObject = new object();

                Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) =>
                {
                    token.ThrowIfCancellationRequested();

                    logger.LogInformation($"{logPrefix} preparing Tag: `{tag}`");

                    var subset    = subsetCreator.CreateByTag(tag.Id, dataSet.TagField);
                    var algorithm = new TwisterAlgorithm(
                        subset, true, true,
                        settings.CompressSettings.CompressCategoryOccurence,
                        settings.CompressSettings.CompressDataSetOccurence,
                        (LogicalOperatorEnum)settings.CompressSettings.CompressOperator);


                    algorithm.InitTagDictionary();
                    var notNeededWords = subset.WordsWithOccurences.Keys.Except(
                        algorithm.TagDictionary
                        .Where(sd => sd.Value.PMI > 0)
                        .Select(sd => sd.Key)).ToList();

                    var td = algorithm.GetDictionary();

                    foreach (var word in notNeededWords)
                    {
                        subset.WordsWithOccurences.Remove(word);
                    }

                    lock (lockObject)
                    {
                        //dictionary serialization
                        var dicProtoBuf = new DictionaryProtoBuf
                        {
                            Id         = tag.Id,
                            Dictionary = td,
                            NGram      = 1
                        };
                        dicProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, dicProtoBuf.GetFileName()));

                        //subset serialization
                        var subsetProtoBuf = new SubsetProtoBuf
                        {
                            Id = tag.Id,
                            WordsWithOccurences        = subset.WordsWithOccurences,
                            AllWordsOccurencesSumInTag = subset.AllWordsOccurencesSumInTag,
                            AllOccurencesSumInCorpus   = subset.AllWordsOccurencesSumInCorpus
                        };
                        subsetProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, subsetProtoBuf.GetFileName()));

                        progress.Step();
                        processHandler.Changed(processId, progress.Percent.Round(2));
                    }

                    logger.LogInformation($"{logPrefix} prepared Tag: `{tag}`");
                });

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Prc, service.Name));
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.New;
                serviceQuery.Update(service.Id, service);
                IOHelper.SafeDeleteDictionary(directoryPath, true);
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
            }
        }
Пример #2
0
        public void Activate(string processId, PrcSettingsElastic settings, CancellationToken token)
        {
            try
            {
                GC.Collect();
                machineResourceService.UpdateResourcesManually();
                var freeMemInBytes = machineResourceService.Status.FreeMemory * 1024 * 1024;

                var directoryPath   = string.Format("{0}/{1}", _dictionaryRootPath, settings.ServiceId);
                var dictionaryPaths = IOHelper.GetFilesInFolder(directoryPath, DictionaryProtoBuf.GetExtension());

                var subsetPaths = new List <string>();
                subsetPaths.AddRange(IOHelper.GetFilesInFolder(directoryPath, SubsetProtoBuf.GetExtension()));

                var sizeInBytes = dictionaryPaths.Sum(f => new FileInfo(f).Length);
                sizeInBytes += subsetPaths.Sum(f => new FileInfo(f).Length);

                if (freeMemInBytes > 0 && freeMemInBytes < sizeInBytes * Constants.DictionaryInMemoryMultiplier)
                {
                    throw new Common.Exceptions.OutOfResourceException(ServiceResources.NotEnoughResourceToActivateService);
                }

                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                var deserializedDics    = new ConcurrentBag <DictionaryProtoBuf>();
                var deserializedSubsets = new ConcurrentBag <SubsetProtoBuf>();

                var lockObject = new object();
                var allCount   = dictionaryPaths.Count + subsetPaths.Count;
                var progress   = new Progress(allCount);

                var allPaths = new List <string>();
                allPaths.AddRange(dictionaryPaths);
                allPaths.AddRange(subsetPaths);

                var dicPathsDic    = new ConcurrentDictionary <string, string>(dictionaryPaths.ToDictionary(p => p, p => p));
                var subsetPathsDic = new ConcurrentDictionary <string, string>(subsetPaths.ToDictionary(p => p, p => p));

                Parallel.ForEach(allPaths, parallelService.ParallelOptions(), (path, loopState) =>
                {
                    token.ThrowIfCancellationRequested();

                    if (dicPathsDic.ContainsKey(path))
                    {
                        deserializedDics.Add(BaseProtoBuf.DeSerialize <DictionaryProtoBuf>(path));
                    }
                    else if (subsetPathsDic.ContainsKey(path))
                    {
                        deserializedSubsets.Add(BaseProtoBuf.DeSerialize <SubsetProtoBuf>(path));
                    }

                    lock (lockObject)
                    {
                        progress.Step();
                        if (progress.Value % 15 == 0)
                        {
                            processHandler.Changed(processId, progress.Percent.Round(2));
                        }
                    }
                });

                var globalStorePrc = new GlobalStorePrc();

                if (deserializedDics.Any())
                {
                    var scorersDic = deserializedDics.GroupBy(d => d.Id).ToDictionary(d => d.Key, d => new Cerebellum.Scorer.PeSScorer(d.ToDictionary(di => di.NGram, di => di.Dictionary)));
                    globalStorePrc.PrcScorers = scorersDic;
                }

                if (deserializedSubsets.Any())
                {
                    var subsetsDic = deserializedSubsets.ToDictionary(d => d.Id, d => new Subset
                    {
                        AllWordsOccurencesSumInCorpus = d.AllOccurencesSumInCorpus,
                        AllWordsOccurencesSumInTag    = d.AllWordsOccurencesSumInTag,
                        WordsWithOccurences           = d.WordsWithOccurences
                    });
                    globalStorePrc.PrcSubsets = subsetsDic;
                }

                globalStorePrc.PrcsSettings = settings;

                GlobalStore.ActivatedPrcs.Add(settings.ServiceId, globalStorePrc);

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyActivated_0_Service_1, ServiceTypeEnum.Prc, service.Name));
                service.Status = (int)ServiceStatusEnum.Active;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
                if (GlobalStore.ActivatedPrcs.IsExist(settings.ServiceId))
                {
                    GlobalStore.ActivatedPrcs.Remove(settings.ServiceId);
                }
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
                GC.Collect();
            }
        }