Ejemplo n.º 1
0
 /// <summary>
 /// if force==true, analysis is run again, even if it's already there.
 /// if force==false, only relationships that have not been analyzed yet are run again.
 /// </summary>
 public void TriggerSimilarityAnalysisForAllDumps(bool force, DateTime timeFrom)
 {
     Console.WriteLine($"Triggering similarity analysis for all dumps new thatn {timeFrom}. force={force}");
     // start analysis with newest dump
     // for every dump, only analyze newer ones.
     // that way, at first, only newset dumps are compared with newest ones.
     foreach (var dumpInfo in dumpRepo.GetAll().Where(x => x.Created >= timeFrom).OrderByDescending(x => x.Created))
     {
         ScheduleSimilarityAnalysis(dumpInfo, force, dumpInfo.Created);
     }
 }
        public async Task Populate()
        {
            if (!settings.Value.SimilarityDetectionEnabled)
            {
                return;
            }
            await BlockIfBundleRepoNotReady("RelationshipRepository.Populate");

            await semaphoreSlim.WaitAsync().ConfigureAwait(false);

            var sw = new Stopwatch(); sw.Start();

            try {
                var tasks = dumpRepo.GetAll().Select(dump => Task.Run(async() => {
                    try {
                        relationShips[dump.Id] = await relationshipStorage.ReadRelationships(dump.Id);
                    } catch (FileNotFoundException) {
                        // ignore.
                    } catch (Exception e) {
                        Console.WriteLine($"RelationshipRepository.Populate: Error reading relationship file for dump {dump.Id}: " + e.Message);
                        relationshipStorage.Wipe(dump.Id);
                    }
                }));
                await Task.WhenAll(tasks);
            } finally {
                IsPopulated = true;
                semaphoreSlim.Release();
            }
            sw.Stop(); Console.WriteLine($"Finished populating RelationshipRepository in {sw.Elapsed}");
        }
Ejemplo n.º 3
0
        public async Task <IOrderedEnumerable <DumpViewModel> > SearchBySimpleFilter(string searchFilter, bool includeSimilarities = true)
        {
            var dumps = await Task.WhenAll(dumpRepo.GetAll().Select(x => ToDumpViewModel(x, dumpRepo, bundleRepo, includeSimilarities ? similarityService : null)));

            var filtered = SimpleFilter(searchFilter, dumps).OrderByDescending(x => x.DumpInfo.Created);

            return(filtered);
        }
Ejemplo n.º 4
0
        public async Task Populate()
        {
            await semaphoreSlim.WaitAsync().ConfigureAwait(false);

            try {
                foreach (var dump in dumpRepo.GetAll())
                {
                    try {
                        relationShips[dump.Id] = await relationshipStorage.ReadRelationships(dump.Id);
                    } catch (FileNotFoundException) {
                        // ignore.
                    } catch (Exception e) {
                        Console.WriteLine("error reading relationship file: " + e.ToString());
                        relationshipStorage.Wipe(dump.Id);
                    }
                }
            } finally {
                semaphoreSlim.Release();
            }
        }
Ejemplo n.º 5
0
        public async Task PushAllResultsAsync(bool clean)
        {
            if (elasticClient == null)
            {
                throw new InvalidOperationException("ElasticSearch has not been initialized! Please verify that the settings specify a correct elastic search host.");
            }

            await BlockIfBundleRepoNotReady("ElasticSearchService.PushAllResultsAsync");

            if (clean)
            {
                DeleteIndex();
                CreateIndex();

                // since we are clean, we can do everything in one bulk
                var dumps = dumpRepo.GetAll().OrderByDescending(x => x.Created);
                foreach (var dumpsBatch in dumps.Batch(100))
                {
                    var tasks   = dumpsBatch.Select(x => Task.Run(async() => new { res = await dumpRepo.GetResult(x.Id), bundleInfo = bundleRepo.Get(x.BundleId), dumpInfo = x }));
                    var results = (await Task.WhenAll(tasks)).Where(x => x.res != null);

                    Console.WriteLine($"pushing {results.Count()} results into elasticsearch");
                    var sdResults = results.Select(x => ElasticSDResult.FromResultOrDefault(x.res, x.bundleInfo, x.dumpInfo, pathHelper)).Where(x => x != null);
                    await PushBulk(sdResults);
                }
                return;
            }

            IEnumerable <string> documentIds = GetAllDocumentIds();

            int nErrorsLogged = 0;
            var bundles       = bundleRepo.GetAll();

            if (bundles == null)
            {
                throw new InvalidOperationException("Bundle repository must be populated before pushing data into ES.");
            }

            // In order to check if a dump has already been added, we go through them all and add one at the time
            // There is potential to optimize this and still do a bulk add.
            foreach (BundleMetainfo bundle in bundles)
            {
                var dumps = dumpRepo.Get(bundle.BundleId);
                if (dumps == null)
                {
                    continue;
                }
                foreach (DumpMetainfo dump in dumps)
                {
                    if (documentIds.Contains(bundle.BundleId + "/" + dump.DumpId))
                    {
                        continue;
                    }
                    SDResult result = await dumpRepo.GetResult(dump.Id);

                    if (result != null)
                    {
                        bool success = await PushResultAsync(result, bundle, dump);

                        if (!success && nErrorsLogged < 20)
                        {
                            Console.WriteLine($"Failed to create document for {dump.BundleId}/{dump.DumpId}");
                            nErrorsLogged++;
                        }
                    }
                }
            }
        }