private IList <EntityInfo> ExtractEntityInfos(IndexSearcher searcher) { TopDocs hits = GetTopDocs(searcher); SetResultSize(hits); int first = First(); int max = Max(first, hits); int size = Math.Max(0, max - first + 1); IList <EntityInfo> infos = new List <EntityInfo>(size); if (size <= 0) { return(infos); } DocumentExtractor extractor = new DocumentExtractor(SearchFactory, indexProjection); for (int index = first; index <= max; index++) { //TODO use indexSearcher.getIndexReader().document( hits.id(index), FieldSelector(indexProjection) ); infos.Add(extractor.Extract(hits, searcher, index)); } return(infos); }
private void wsSearch_SearcherLinkFoundEvent(object sender, EventsThreads.CollectionFound <Uri> e) { foreach (Uri url in e.Data) { try { try { string fileExtension = Path.GetExtension(url.AbsolutePath).ToLowerInvariant(); if (!String.IsNullOrWhiteSpace(fileExtension) && DocumentExtractor.IsSupportedExtension(fileExtension)) { var fi = new FilesItem { Ext = fileExtension, URL = url.ToString(), Downloaded = false, MetadataExtracted = false, Date = DateTime.MinValue, ModifiedDate = DateTime.MinValue, Path = string.Empty, Size = -1 }; Program.data.files.Items.Add(fi); Program.FormMainInstance.treeViewMetadata_UpdateDocumentsNumber(); Program.FormMainInstance.panelMetadataSearch.listViewDocuments_Update(fi); Program.FormMainInstance.panelMetadataSearch.HttpSizeDaemonInst.AddURL(fi); } } catch (Exception) { } // add the url to the files list DomainsItem domain = Program.data.GetDomain(url.Host); if (domain == null) { Program.data.AddDomain(url.Host, "Crawling", Program.cfgCurrent.MaxRecursion, Program.cfgCurrent); Program.LogThis(new Log(Log.ModuleType.Crawling, "Domain found: " + url.Host, Log.LogType.medium)); domain = Program.data.GetDomain(url.Host); } domain.map.AddUrl(url.ToString()); if (domain.techAnalysis.domain == null) { domain.techAnalysis.domain = domain.Domain; } domain.techAnalysis.eventLinkFoundDetailed(null, new EventsThreads.CollectionFound <Uri>(new List <Uri> { url })); } catch { } } }
/// <summary> /// Return an interator on the results. /// Retrieve the object one by one (initialize it during the next() operation) /// </summary> public override IEnumerable <T> Enumerable <T>() { using (new SessionIdLoggingContext(Session.SessionId)) { //implement an interator which keep the id/class for each hit and get the object on demand //cause I can't keep the searcher and hence the hit opened. I dont have any hook to know when the //user stop using it //scrollable is better in this area //find the directories IndexSearcher searcher = BuildSearcher(); if (searcher == null) { return(new IteratorImpl <T>(new List <EntityInfo>(), noLoader).Iterate()); } try { var topDocs = GetTopDocs(searcher); SetResultSize(topDocs); int first = First(); int max = Max(first, topDocs); int size = max - first + 1 < 0 ? 0 : max - first + 1; IList <EntityInfo> infos = new List <EntityInfo>(size); DocumentExtractor extractor = new DocumentExtractor(SearchFactory, indexProjection); for (int index = first; index <= max; index++) { //TODO use indexSearcher.getIndexReader().document( hits.id(index), FieldSelector(indexProjection) ); infos.Add(extractor.Extract(topDocs, searcher, index)); } return(new IteratorImpl <T>(infos, this.GetLoader((ISession)Session)).Iterate()); } catch (IOException e) { throw new HibernateException("Unable to query Lucene index", e); } finally { CloseSearcher(searcher); } } }