Exemplo n.º 1
0
        public List <IndexPageResult> GetDocumentPagesWithQuery(string query)
        {
            List <IndexPageResult> results = new List <IndexPageResult>();
            Dictionary <string, IndexPageResult> fingerprints_already_seen = new Dictionary <string, IndexPageResult>();

            try
            {
                using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true))
                {
                    using (IndexSearcher index_searcher = new IndexSearcher(index_reader))
                    {
                        QueryParser query_parser = new QueryParser(Version.LUCENE_29, "content", analyzer);

                        Query query_object          = query_parser.Parse(query);
                        Lucene.Net.Search.Hits hits = index_searcher.Search(query_object);

                        var i = hits.Iterator();
                        while (i.MoveNext())
                        {
                            Hit    hit         = (Hit)i.Current;
                            string fingerprint = hit.Get("fingerprint");
                            int    page        = Convert.ToInt32(hit.Get("page"));
                            double score       = hit.GetScore();

                            // If this is the first time we have seen this fingerprint, make the top-level record
                            if (!fingerprints_already_seen.ContainsKey(fingerprint))
                            {
                                IndexPageResult result = new IndexPageResult();
                                result.fingerprint = fingerprint;
                                result.score       = score;

                                // Add to our structures
                                results.Add(result);
                                fingerprints_already_seen[fingerprint] = result;
                            }

                            // And add the page record
                            {
                                IndexPageResult result = fingerprints_already_seen[fingerprint];
                                result.page_results.Add(new PageResult {
                                    page = page, score = score
                                });
                            }
                        }

                        // Close the index
                        index_searcher.Close();
                    }
                    index_reader.Close();
                }
            }
            catch (Exception ex)
            {
                Logging.Warn(ex, "GetDocumentPagesWithQuery: There was a problem opening the index file for searching.");
            }

            return(results);
        }
        public List <string> GetDocumentsSimilarToDocument(string document_filename)
        {
            List <string> fingerprints = new List <string>();

            IndexReader index_reader   = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true);
            Searcher    index_searcher = new IndexSearcher(index_reader);

            LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(index_reader);

            mlt.SetFieldNames(new string[] { "content" });
            mlt.SetMinTermFreq(0);

            Query query = mlt.Like(new StreamReader(document_filename));
            Hits  hits  = index_searcher.Search(query);
            var   i     = hits.Iterator();

            while (i.MoveNext())
            {
                Hit    hit         = (Hit)i.Current;
                string fingerprint = hit.Get("fingerprint");
                fingerprints.Add(fingerprint);
            }

            // Close the index
            index_searcher.Close();
            index_reader.Close();

            return(fingerprints);
        }
        /***
         * Understands the lucene query syntax
         */
        public List <IndexResult> GetDocumentsWithQuery(string query)
        {
            List <IndexResult> fingerprints = new List <IndexResult>();
            HashSet <string>   fingerprints_already_seen = new HashSet <string>();

            try
            {
                IndexReader index_reader   = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true);
                Searcher    index_searcher = new IndexSearcher(index_reader);

                QueryParser query_parser = new QueryParser(Version.LUCENE_29, "content", analyzer);

                Query query_object = query_parser.Parse(query);
                Hits  hits         = index_searcher.Search(query_object);

                var i = hits.Iterator();
                while (i.MoveNext())
                {
                    Hit    hit         = (Hit)i.Current;
                    string fingerprint = hit.Get("fingerprint");
                    string page        = hit.Get("page");

                    if (!fingerprints_already_seen.Contains(fingerprint))
                    {
                        fingerprints_already_seen.Add(fingerprint);

                        IndexResult index_result = new IndexResult {
                            fingerprint = fingerprint, score = hit.GetScore()
                        };
                        fingerprints.Add(index_result);
                    }
                }

                // Close the index
                index_searcher.Close();
                index_reader.Close();
            }
            catch (Exception ex)
            {
                Logging.Warn(ex, "There was a problem opening the index file for searching.");
            }

            return(fingerprints);
        }
Exemplo n.º 4
0
        public HashSet <string> GetDocumentsWithWord(string keyword)
        {
            HashSet <string> fingerprints = new HashSet <string>();

            try
            {
                keyword = ReasonableWord.MakeReasonableWord(keyword);
                if (null != keyword)
                {
                    ////Do a quick check for whether there are actually any segments files, otherwise we throw many exceptions in the IndexReader.Open in a very tight loop.
                    ////Added by Nik to cope with some exception...will uncomment this when i know what the problem is...
                    //var segments_files = Directory.GetFiles(LIBRARY_INDEX_BASE_PATH, "segments*", SearchOption.AllDirectories);
                    //if (segments_files.Length <= 0)
                    //{
                    //    Logging.Debug("No index segments files found");
                    //    return fingerprints;
                    //}

                    using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true))
                    {
                        using (IndexSearcher index_searcher = new IndexSearcher(index_reader))
                        {
                            TermQuery term_query = new TermQuery(new Term("content", keyword));
                            Hits      hits       = index_searcher.Search(term_query);

                            var i = hits.Iterator();
                            while (i.MoveNext())
                            {
                                Hit    hit         = (Hit)i.Current;
                                string fingerprint = hit.Get("fingerprint");
                                fingerprints.Add(fingerprint);
                            }

                            // Close the index
                            index_searcher.Close();
                        }
                        index_reader.Close();
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Warn(ex, "GetDocumentsWithWord: There was a problem opening the index file for searching.");
            }

            return(fingerprints);
        }
Exemplo n.º 5
0
        public List <string> GetDocumentsSimilarToDocument(string document_filename)
        {
            List <string> fingerprints = new List <string>();

            try
            {
                using (IndexReader index_reader = IndexReader.Open(LIBRARY_INDEX_BASE_PATH, true))
                {
                    using (IndexSearcher index_searcher = new IndexSearcher(index_reader))
                    {
                        LuceneMoreLikeThis mlt = new LuceneMoreLikeThis(index_reader);
                        mlt.SetFieldNames(new string[] { "content" });
                        mlt.SetMinTermFreq(0);

                        Query query = mlt.Like(new StreamReader(document_filename));
                        Hits  hits  = index_searcher.Search(query);
                        var   i     = hits.Iterator();
                        while (i.MoveNext())
                        {
                            Hit    hit         = (Hit)i.Current;
                            string fingerprint = hit.Get("fingerprint");
                            fingerprints.Add(fingerprint);
                        }

                        // Close the index
                        index_searcher.Close();
                    }
                    index_reader.Close();
                }
            }
            catch (Exception ex)
            {
                Logging.Warn(ex, "GetDocumentsSimilarToDocument: There was a problem opening the index file for searching.");
            }

            return(fingerprints);
        }