Ejemplo n.º 1
0
        public void ExportDictionaries(string processId, PrcSettingsElastic settings, List <string> tagIdList, CancellationToken token, string hostUrl)
        {
            try
            {
                var service = serviceQuery.Get(settings.ServiceId);

                var dataSet     = GlobalStore.DataSets.Get(settings.DataSetName).DataSet;
                var allDicCount = tagIdList.Count;
                /*ZIP time*/
                allDicCount += (allDicCount / 10);
                var progress = new Progress(allDicCount);

                var dictionariesPath = string.Format("{0}/{1}", _dictionaryRootPath, settings.ServiceId);

                var tempDirectoryPath = string.Format("{0}/{1}", siteConfig.Directory.Temp, processId);
                System.IO.Directory.CreateDirectory(tempDirectoryPath);

                foreach (var tagId in tagIdList)
                {
                    if (token.IsCancellationRequested)
                    {
                        processHandler.Cancelled(processId);
                        return;
                    }
                    var filePath    = $"{dictionariesPath}/{DictionaryProtoBuf.GetFileName(tagId)}";
                    var dicProtoBuf = BaseProtoBuf.DeSerialize <DictionaryProtoBuf>(filePath);

                    var csvPath = $"{tempDirectoryPath}/{tagId}.csv";
                    if (dicProtoBuf.Dictionary != null)
                    {
                        CsvHelper.CreateCsv(csvPath, dicProtoBuf.Dictionary.Select(d => new List <string> {
                            d.Key, d.Value.ToString()
                        }).ToList());
                    }
                    else
                    {
                        CsvHelper.CreateCsv(csvPath, new List <List <string> >());
                    }

                    progress.Step();
                    processHandler.Changed(processId, progress.Percent.Round(2));
                }
                /*time to ZIP the results*/
                var zipFileName   = string.Format("{0}.zip", processId);
                var dirToZipPath  = string.Format("{0}/{1}", siteConfig.Directory.Temp, processId);
                var resultZipPath = string.Format("{0}/{1}", siteConfig.Directory.User, zipFileName);
                ZipHelper.CompressFolder(dirToZipPath, resultZipPath);

                var zipUrl = string.Format("{0}{1}/{2}", hostUrl, Common.Constants.FilesPath, zipFileName);

                processHandler.Finished(processId,
                                        string.Format("{0}\n{1}",
                                                      string.Format(ServiceResources.SuccessfullyExportedDictionariesFrom_0_Service_1, ServiceTypeEnum.Prc, service.Name),
                                                      string.Format(ServiceResources.ExportFileCanBeDownloadFromHere_0, zipUrl)));
            }
            catch (Exception ex)
            {
                processHandler.Interrupted(processId, ex);
            }
        }
        public void Activate(string processId, ClassifierSettingsElastic settings, CancellationToken token)
        {
            try
            {
                GC.Collect();
                machineResourceService.UpdateResourcesManually();
                var freeMemInBytes = machineResourceService.Status.FreeMemory * 1024 * 1024;

                var dictionaryPaths = new List <string>();
                foreach (var nGram in settings.ActivatedNGramList)
                {
                    var directoryPath = string.Format("{0}/{1}/{2}", _dictionaryRootPath, settings.ServiceId, nGram);
                    var fileList      = IOHelper.GetFilesInFolder(directoryPath, DictionaryProtoBuf.GetExtension())
                                        .Where(file => settings.ActivatedTagIdList.Contains(Path.GetFileNameWithoutExtension(file)));
                    dictionaryPaths.AddRange(fileList);
                }

                var sizeInBytes = dictionaryPaths.Sum(f => new FileInfo(f).Length);
                if (freeMemInBytes > 0 && freeMemInBytes < sizeInBytes * Constants.DictionaryInMemoryMultiplier)
                {
                    throw new Common.Exceptions.OutOfResourceException(ServiceResources.NotEnoughResourceToActivateService);
                }

                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                var lockObject = new object();
                var counter    = 0;
                var allCount   = dictionaryPaths.Count;

                var deserializedDics = new ConcurrentBag <DictionaryProtoBuf>();
                Parallel.ForEach(dictionaryPaths, parallelService.ParallelOptions(), (path, loopState) => {
                    token.ThrowIfCancellationRequested();
                    deserializedDics.Add(BaseProtoBuf.DeSerialize <DictionaryProtoBuf>(path));
                    lock (lockObject)
                    {
                        if (++counter % 15 == 0)
                        {
                            processHandler.Changed(processId, Math.Round(counter / (double)allCount * 100, 2));
                        }
                    }
                });

                var globalStoreClassifier = new GlobalStoreClassifier();

                if (deserializedDics.Any())
                {
                    var scorersDic = deserializedDics.GroupBy(d => d.Id).ToDictionary(d => d.Key, d => new Cerebellum.Scorer.PeSScorer(d.ToDictionary(di => di.NGram, di => di.Dictionary)));
                    globalStoreClassifier.ClassifierScorers = scorersDic;
                }
                var tagsDics = settings.Tags.ToDictionary(t => t.Id, t => t);
                var tagsDic  = settings.Tags.ToDictionary(
                    t => t.Id,
                    t => tagService.GetTagModel(settings.DataSetName, t.Id, false, tagsDics)
                    );

                var analyzeQuery = queryFactory.GetAnalyzeQuery(settings.DataSetName);

                var emphasizedTagsWords = new Dictionary <string, List <string> >();
                foreach (var tagId in settings.EmphasizedTagIdList)
                {
                    var tokens = analyzeQuery.Analyze(tagsDic[tagId].Name, 1).ToList();
                    emphasizedTagsWords.Add(tagId, tokens);
                }

                globalStoreClassifier.ClassifierEmphasizedTagIds = emphasizedTagsWords;
                globalStoreClassifier.ClassifiersSettings        = settings;
                globalStoreClassifier.ClassifierTags             = tagsDic;
                globalStoreClassifier.ClassifierParentTagIds     = tagsDic.SelectMany(td => td.Value.Properties.Paths
                                                                                      .Select(p => p.Id)).Distinct().ToDictionary(p => p, p => p);

                GlobalStore.ActivatedClassifiers.Add(settings.ServiceId, globalStoreClassifier);

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyActivated_0_Service_1, ServiceTypeEnum.Classifier, service.Name));
                service.Status = (int)ServiceStatusEnum.Active;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
                if (GlobalStore.ActivatedClassifiers.IsExist(settings.ServiceId))
                {
                    GlobalStore.ActivatedClassifiers.Remove(settings.ServiceId);
                }
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
                GC.Collect();
            }
        }
Ejemplo n.º 3
0
        public void Activate(string processId, PrcSettingsElastic settings, CancellationToken token)
        {
            try
            {
                GC.Collect();
                machineResourceService.UpdateResourcesManually();
                var freeMemInBytes = machineResourceService.Status.FreeMemory * 1024 * 1024;

                var directoryPath   = string.Format("{0}/{1}", _dictionaryRootPath, settings.ServiceId);
                var dictionaryPaths = IOHelper.GetFilesInFolder(directoryPath, DictionaryProtoBuf.GetExtension());

                var subsetPaths = new List <string>();
                subsetPaths.AddRange(IOHelper.GetFilesInFolder(directoryPath, SubsetProtoBuf.GetExtension()));

                var sizeInBytes = dictionaryPaths.Sum(f => new FileInfo(f).Length);
                sizeInBytes += subsetPaths.Sum(f => new FileInfo(f).Length);

                if (freeMemInBytes > 0 && freeMemInBytes < sizeInBytes * Constants.DictionaryInMemoryMultiplier)
                {
                    throw new Common.Exceptions.OutOfResourceException(ServiceResources.NotEnoughResourceToActivateService);
                }

                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Busy;
                serviceQuery.Update(service.Id, service);

                var deserializedDics    = new ConcurrentBag <DictionaryProtoBuf>();
                var deserializedSubsets = new ConcurrentBag <SubsetProtoBuf>();

                var lockObject = new object();
                var allCount   = dictionaryPaths.Count + subsetPaths.Count;
                var progress   = new Progress(allCount);

                var allPaths = new List <string>();
                allPaths.AddRange(dictionaryPaths);
                allPaths.AddRange(subsetPaths);

                var dicPathsDic    = new ConcurrentDictionary <string, string>(dictionaryPaths.ToDictionary(p => p, p => p));
                var subsetPathsDic = new ConcurrentDictionary <string, string>(subsetPaths.ToDictionary(p => p, p => p));

                Parallel.ForEach(allPaths, parallelService.ParallelOptions(), (path, loopState) =>
                {
                    token.ThrowIfCancellationRequested();

                    if (dicPathsDic.ContainsKey(path))
                    {
                        deserializedDics.Add(BaseProtoBuf.DeSerialize <DictionaryProtoBuf>(path));
                    }
                    else if (subsetPathsDic.ContainsKey(path))
                    {
                        deserializedSubsets.Add(BaseProtoBuf.DeSerialize <SubsetProtoBuf>(path));
                    }

                    lock (lockObject)
                    {
                        progress.Step();
                        if (progress.Value % 15 == 0)
                        {
                            processHandler.Changed(processId, progress.Percent.Round(2));
                        }
                    }
                });

                var globalStorePrc = new GlobalStorePrc();

                if (deserializedDics.Any())
                {
                    var scorersDic = deserializedDics.GroupBy(d => d.Id).ToDictionary(d => d.Key, d => new Cerebellum.Scorer.PeSScorer(d.ToDictionary(di => di.NGram, di => di.Dictionary)));
                    globalStorePrc.PrcScorers = scorersDic;
                }

                if (deserializedSubsets.Any())
                {
                    var subsetsDic = deserializedSubsets.ToDictionary(d => d.Id, d => new Subset
                    {
                        AllWordsOccurencesSumInCorpus = d.AllOccurencesSumInCorpus,
                        AllWordsOccurencesSumInTag    = d.AllWordsOccurencesSumInTag,
                        WordsWithOccurences           = d.WordsWithOccurences
                    });
                    globalStorePrc.PrcSubsets = subsetsDic;
                }

                globalStorePrc.PrcsSettings = settings;

                GlobalStore.ActivatedPrcs.Add(settings.ServiceId, globalStorePrc);

                processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyActivated_0_Service_1, ServiceTypeEnum.Prc, service.Name));
                service.Status = (int)ServiceStatusEnum.Active;
                serviceQuery.Update(service.Id, service);
            }
            catch (Exception ex)
            {
                var service = serviceQuery.Get(settings.ServiceId);
                service.Status = (int)ServiceStatusEnum.Prepared;
                serviceQuery.Update(service.Id, service);
                if (GlobalStore.ActivatedPrcs.IsExist(settings.ServiceId))
                {
                    GlobalStore.ActivatedPrcs.Remove(settings.ServiceId);
                }
                if (ex.InnerException != null && ex.InnerException is OperationCanceledException)
                {
                    processHandler.Cancelled(processId);
                }
                else
                {
                    processHandler.Interrupted(processId, ex);
                }
                GC.Collect();
            }
        }