DocumentExtractor C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : FullTextQueryImpl.cs Projet : esha/NHibernate-Search

        private IList <EntityInfo> ExtractEntityInfos(IndexSearcher searcher)
        {
            TopDocs hits = GetTopDocs(searcher);

            SetResultSize(hits);
            int first = First();
            int max   = Max(first, hits);
            int size  = Math.Max(0, max - first + 1);
            IList <EntityInfo> infos = new List <EntityInfo>(size);

            if (size <= 0)
            {
                return(infos);
            }

            DocumentExtractor extractor = new DocumentExtractor(SearchFactory, indexProjection);

            for (int index = first; index <= max; index++)
            {
                //TODO use indexSearcher.getIndexReader().document( hits.id(index), FieldSelector(indexProjection) );
                infos.Add(extractor.Extract(hits, searcher, index));
            }

            return(infos);
        }

Exemple #2

0

Afficher le fichier

        private void wsSearch_SearcherLinkFoundEvent(object sender, EventsThreads.CollectionFound <Uri> e)
        {
            foreach (Uri url in e.Data)
            {
                try
                {
                    try
                    {
                        string fileExtension = Path.GetExtension(url.AbsolutePath).ToLowerInvariant();

                        if (!String.IsNullOrWhiteSpace(fileExtension) && DocumentExtractor.IsSupportedExtension(fileExtension))
                        {
                            var fi = new FilesItem
                            {
                                Ext               = fileExtension,
                                URL               = url.ToString(),
                                Downloaded        = false,
                                MetadataExtracted = false,
                                Date              = DateTime.MinValue,
                                ModifiedDate      = DateTime.MinValue,
                                Path              = string.Empty,
                                Size              = -1
                            };
                            Program.data.files.Items.Add(fi);
                            Program.FormMainInstance.treeViewMetadata_UpdateDocumentsNumber();
                            Program.FormMainInstance.panelMetadataSearch.listViewDocuments_Update(fi);
                            Program.FormMainInstance.panelMetadataSearch.HttpSizeDaemonInst.AddURL(fi);
                        }
                    }
                    catch (Exception)
                    {
                    }
                    // add the url to the files list
                    DomainsItem domain = Program.data.GetDomain(url.Host);
                    if (domain == null)
                    {
                        Program.data.AddDomain(url.Host, "Crawling", Program.cfgCurrent.MaxRecursion, Program.cfgCurrent);
                        Program.LogThis(new Log(Log.ModuleType.Crawling, "Domain found: " + url.Host, Log.LogType.medium));
                        domain = Program.data.GetDomain(url.Host);
                    }

                    domain.map.AddUrl(url.ToString());
                    if (domain.techAnalysis.domain == null)
                    {
                        domain.techAnalysis.domain = domain.Domain;
                    }
                    domain.techAnalysis.eventLinkFoundDetailed(null, new EventsThreads.CollectionFound <Uri>(new List <Uri> {
                        url
                    }));
                }
                catch
                {
                }
            }
        }

Exemple #3

0

Afficher le fichier

        /// <summary>
        /// Return an interator on the results.
        /// Retrieve the object one by one (initialize it during the next() operation)
        /// </summary>
        public override IEnumerable <T> Enumerable <T>()
        {
            using (new SessionIdLoggingContext(Session.SessionId))
            {
                //implement an interator which keep the id/class for each hit and get the object on demand
                //cause I can't keep the searcher and hence the hit opened. I dont have any hook to know when the
                //user stop using it
                //scrollable is better in this area

                //find the directories
                IndexSearcher searcher = BuildSearcher();
                if (searcher == null)
                {
                    return(new IteratorImpl <T>(new List <EntityInfo>(), noLoader).Iterate());
                }

                try
                {
                    var topDocs = GetTopDocs(searcher);
                    SetResultSize(topDocs);
                    int first = First();
                    int max   = Max(first, topDocs);

                    int size = max - first + 1 < 0 ? 0 : max - first + 1;
                    IList <EntityInfo> infos     = new List <EntityInfo>(size);
                    DocumentExtractor  extractor = new DocumentExtractor(SearchFactory, indexProjection);
                    for (int index = first; index <= max; index++)
                    {
                        //TODO use indexSearcher.getIndexReader().document( hits.id(index), FieldSelector(indexProjection) );
                        infos.Add(extractor.Extract(topDocs, searcher, index));
                    }
                    return(new IteratorImpl <T>(infos, this.GetLoader((ISession)Session)).Iterate());
                }
                catch (IOException e)
                {
                    throw new HibernateException("Unable to query Lucene index", e);
                }
                finally
                {
                    CloseSearcher(searcher);
                }
            }
        }

C# (CSharp) DocumentExtractor Exemples