/*
         * public void testTermRepeatedQuery() throws IOException, ParseException {
         * // TODO: this corner case yields different results.
         * checkQuery("multi* multi* foo");
         * }
         */

        /// <summary> checks if a query yields the same result when executed on
        /// a single IndexSearcher containing all documents and on a
        /// MultiSearcher aggregating sub-searchers
        /// </summary>
        /// <param name="queryStr"> the query to check.
        /// </param>
        /// <throws>  IOException </throws>
        /// <throws>  ParseException </throws>
        private void  CheckQuery(System.String queryStr)
        {
            // check result hit ranking
            if (verbose)
            {
                System.Console.Out.WriteLine("Query: " + queryStr);
            }
            QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
            Query       query       = queryParser.Parse(queryStr);

            ScoreDoc[] multiSearcherHits  = multiSearcher.Search(query, null, 1000).scoreDocs;
            ScoreDoc[] singleSearcherHits = singleSearcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(multiSearcherHits.Length, singleSearcherHits.Length);
            for (int i = 0; i < multiSearcherHits.Length; i++)
            {
                Document docMulti  = multiSearcher.Doc(multiSearcherHits[i].doc);
                Document docSingle = singleSearcher.Doc(singleSearcherHits[i].doc);
                if (verbose)
                {
                    System.Console.Out.WriteLine("Multi:  " + docMulti.Get(FIELD_NAME) + " score=" + multiSearcherHits[i].score);
                }
                if (verbose)
                {
                    System.Console.Out.WriteLine("Single: " + docSingle.Get(FIELD_NAME) + " score=" + singleSearcherHits[i].score);
                }
                Assert.AreEqual(multiSearcherHits[i].score, singleSearcherHits[i].score, 0.001f);
                Assert.AreEqual(docMulti.Get(FIELD_NAME), docSingle.Get(FIELD_NAME));
            }
            if (verbose)
            {
                System.Console.Out.WriteLine();
            }
        }
Example #2
0
        /// <summary>
        /// Search for files.
        /// </summary>
        /// <param name="queryText">The query text.</param>
        /// <returns>The files that match the query text.</returns>
        public SourceFile[] Search(string queryText)
        {
            Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(
                Lucene.Net.Util.Version.LUCENE_30,
                "body",
                _analyzer);

            Lucene.Net.Search.Query query = parser.Parse(queryText);

            using (Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_directory, true))
            {
                Lucene.Net.Search.TopDocs result = searcher.Search(query, int.MaxValue);

                List <SourceFile> files = new List <SourceFile>();
                foreach (Lucene.Net.Search.ScoreDoc d in result.ScoreDocs)
                {
                    Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc);
                    files.Add(new SourceFile(
                                  doc.Get("id"),
                                  doc.Get("type"),
                                  doc.Get("name"),
                                  doc.Get("fileName"),
                                  null));
                }

                return(files.ToArray());
            }
        }
Example #3
0
 public void SetProperty(Lucene.Net.Documents.Document doc)
 {
     if (doc != null)
     {
         this.SpecialtyId = Convert.ToInt32(doc.Get("SpecialtyId"));
         this.Name        = doc.Get("Name");
     }
 }
Example #4
0
 // Mapping Lucene data
 private Message MapLuceneData(Lucene.Net.Documents.Document doc)
 {
     return(new Message
     {
         ID = (doc.Get("MessageId").ToType <int>()),
         MessageText = (doc.Get("Message"))
     });
 }
Example #5
0
 public void SetProperty(Lucene.Net.Documents.Document doc)
 {
     if (doc != null)
     {
         this.UniversityId = Convert.ToInt32(doc.Get("UniversityId"));
         this.Name         = doc.Get("CnName");
         this.CnName       = doc.Get("CnName");
         this.Pinyin       = doc.Get("Pinyin");
     }
 }
Example #6
0
        public List <indexVideo> searchArticles(string queryString, int numberOfResults, bool komin)
        {
            List <indexVideo> resultsList = new List <indexVideo>();

            Lucene.Net.Store.Directory index = publicIndex;
            if (komin)
            {
                index = kominIndex;
            }

            if (!string.IsNullOrEmpty(queryString))
            {
                Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new[] { "title", "shortDescription" }, analyser);
                try
                {
                    Lucene.Net.Search.Query query = parser.Parse(queryString + "~");

                    Lucene.Net.Search.Searcher searcher = new Lucene.Net.Search.IndexSearcher(index, true);

                    Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(numberOfResults, true);

                    searcher.Search(query, collector);

                    Lucene.Net.Search.ScoreDoc[] hits = collector.TopDocs().ScoreDocs;

                    if (hits.Length >= 1)
                    {
                        for (int i = 0; i < hits.Length; i++)
                        {
                            indexVideo video = new indexVideo();

                            int   docId = hits[i].Doc;
                            float score = hits[i].Score;

                            Lucene.Net.Documents.Document doc = searcher.Doc(docId);

                            video.bctid            = doc.Get("bctid");
                            video.title            = doc.Get("title");
                            video.score            = score;
                            video.shortDescription = doc.Get("shortDescription");
                            video.imageURL         = doc.Get("imageURL");

                            resultsList.Add(video);
                        }
                    }
                }
                catch (Exception e)
                {
                }
            }
            return(resultsList);
        }
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexNoAdds(System.String dirName, bool autoCommit)
        {
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName));

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length, "wrong number of hits");
            Document d = searcher.Doc(hits[0].doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            searcher.Close();

            // make sure we can do a delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.GetIndexReader());
            searcher.Close();

            // optimize
            IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false);

            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.GetIndexReader());
            searcher.Close();

            dir.Close();
        }
Example #8
0
        /// <summary>
        /// 命中结果构造函数
        /// </summary>
        /// <param name="doc">索引档</param>
        /// <param name="offset">关键字在正文中的位置</param>
        internal Hit(Lucene.Net.Documents.Document doc, int offset)
        {
            base.id        = doc.Get("id");
            base.lastIndex = Lucene.Net.Documents.DateField.StringToDate(doc.Get("date"));

            //到开外部存放的文档实体
            StoreReader story = new StoreReader(Directorys.StoreDirectory + Math.Ceiling(Double.Parse(base.id) / 10000D).ToString("f0") + @"\" + base.id + ".gz");

            //读取已保存的文章头
            base.author = story.ReadLine();
            base.cat    = story.ReadLine();
            base.tag    = story.ReadLine();
            base.title  = story.ReadLine();
            base.path   = story.ReadLine();

            int readed = 0;

            int len = 126;//显示内容长度

            char[] block = new char[offset + len];

            //读取正文至关键字后len个字符
            readed = story.ReadBlock(block, 0, block.Length);

            story.Close();

            int index = offset;

            //如果关键字不在结尾处则摘要起始位置定位于关键字前一个标点符号之后,否则摘要取末尾的len个字符
            if (readed == block.Length)
            {
                UnicodeCategory category;
                for (; index > 0; index--)
                {
                    category = Char.GetUnicodeCategory(Char.ToLower(block[index]));
                    if (category == UnicodeCategory.OtherPunctuation)
                    {
                        index += 1;
                        break;
                    }
                }
            }
            else
            {
                index = Math.Max(0, readed - len);
            }

            //如果摘要不在结尾处则在后面添加“...”
            base.body = (new String(block, index, Math.Min(len - 1, readed))) + ((readed >= index + len) ? "..." : "");
        }
        public virtual void  TestFieldSelector()
        {
            RAMDirectory  ramDirectory1, ramDirectory2;
            IndexSearcher indexSearcher1, indexSearcher2;

            ramDirectory1 = new RAMDirectory();
            ramDirectory2 = new RAMDirectory();
            Query query = new TermQuery(new Term("contents", "doc0"));

            // Now put the documents in a different index
            InitIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
            InitIndex(ramDirectory2, 10, true, "x");  // documents with two tokens "doc0" and "x", "doc1" and x, etc...

            indexSearcher1 = new IndexSearcher(ramDirectory1, true);
            indexSearcher2 = new IndexSearcher(ramDirectory2, true);

            MultiSearcher searcher = GetMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 });

            Assert.IsTrue(searcher != null, "searcher is null and it shouldn't be");
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.IsTrue(hits != null, "hits is null and it shouldn't be");
            Assert.IsTrue(hits.Length == 2, hits.Length + " does not equal: " + 2);
            Document document = searcher.Doc(hits[0].Doc);

            Assert.IsTrue(document != null, "document is null and it shouldn't be");
            Assert.IsTrue(document.GetFields().Count == 2, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 2);
            //Should be one document from each directory
            //they both have two fields, contents and other
            ISet <string> ftl = Support.Compatibility.SetFactory.CreateHashSet <string>();

            ftl.Add("other");
            SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Support.Compatibility.SetFactory.CreateHashSet <string>());

            document = searcher.Doc(hits[0].Doc, fs);
            Assert.IsTrue(document != null, "document is null and it shouldn't be");
            Assert.IsTrue(document.GetFields().Count == 1, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 1);
            System.String value_Renamed = document.Get("contents");
            Assert.IsTrue(value_Renamed == null, "value is not null and it should be");
            value_Renamed = document.Get("other");
            Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
            ftl.Clear();
            ftl.Add("contents");
            fs            = new SetBasedFieldSelector(ftl, Support.Compatibility.SetFactory.CreateHashSet <string>());
            document      = searcher.Doc(hits[1].Doc, fs);
            value_Renamed = document.Get("contents");
            Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
            value_Renamed = document.Get("other");
            Assert.IsTrue(value_Renamed == null, "value is not null and it should be");
        }
Example #10
0
        public virtual void  TestDemo_Renamed()
        {
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

            // Store the index in memory:
            Directory directory = new RAMDirectory();
            // To store an index on disk, use this instead:
            //Directory directory = FSDirectory.open("/tmp/testindex");
            IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
            Document    doc     = new Document();

            System.String text = "This is the text to be indexed.";
            doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
            iwriter.AddDocument(doc);
            iwriter.Close();

            // Now search the index:
            IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
            // Parse a simple query that searches for "text":
            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer);
            Query       query  = parser.Parse("text");

            ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            // Iterate through the results:
            for (int i = 0; i < hits.Length; i++)
            {
                Document hitDoc = isearcher.Doc(hits[i].Doc);
                Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed.");
            }
            isearcher.Close();
            directory.Close();
        }
Example #11
0
 private void  QueryTest(Query query)
 {
     ScoreDoc[] parallelHits = parallel.Search(query, null, 1000).scoreDocs;
     ScoreDoc[] singleHits   = single.Search(query, null, 1000).scoreDocs;
     Assert.AreEqual(parallelHits.Length, singleHits.Length);
     for (int i = 0; i < parallelHits.Length; i++)
     {
         Assert.AreEqual(parallelHits[i].score, singleHits[i].score, 0.001f);
         Document docParallel = parallel.Doc(parallelHits[i].doc);
         Document docSingle   = single.Doc(singleHits[i].doc);
         Assert.AreEqual(docParallel.Get("f1"), docSingle.Get("f1"));
         Assert.AreEqual(docParallel.Get("f2"), docSingle.Get("f2"));
         Assert.AreEqual(docParallel.Get("f3"), docSingle.Get("f3"));
         Assert.AreEqual(docParallel.Get("f4"), docSingle.Get("f4"));
     }
 }
Example #12
0
        public virtual void  TestReverseDateSort()
        {
            IndexSearcher searcher = new IndexSearcher(directory, true, null);

            Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true));

            QueryParser queryParser = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, new WhitespaceAnalyzer());
            Query       query       = queryParser.Parse("Document");

            // Execute the search and process the search results.
            System.String[] actualOrder = new System.String[5];
            ScoreDoc[]      hits        = searcher.Search(query, null, 1000, sort, null).ScoreDocs;
            for (int i = 0; i < hits.Length; i++)
            {
                Document      document = searcher.Doc(hits[i].Doc, null);
                System.String text     = document.Get(TEXT_FIELD, null);
                actualOrder[i] = text;
            }
            searcher.Close();

            // Set up the expected order (i.e. Document 5, 4, 3, 2, 1).
            System.String[] expectedOrder = new System.String[5];
            expectedOrder[0] = "Document 5";
            expectedOrder[1] = "Document 4";
            expectedOrder[2] = "Document 3";
            expectedOrder[3] = "Document 2";
            expectedOrder[4] = "Document 1";

            Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder));
        }
 // Mapping Lucene data
 private LuceneData MapLuceneData(Lucene.Net.Documents.Document doc)
 {
     return(new LuceneData
     {
         Actor = (doc.Get("actors"))
     });
 }
Example #14
0
        } // End Sub BuildIndex

        // https://lucenenet.apache.org/
        // https://www.codeproject.com/Articles/609980/Small-Lucene-NET-Demo-App
        // https://stackoverflow.com/questions/12600196/lucene-how-to-index-file-names
        private static void SearchPath(string phrase, string indexPath)
        {
            Lucene.Net.Util.LuceneVersion version = Lucene.Net.Util.LuceneVersion.LUCENE_48;
            Lucene.Net.Store.Directory    luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath);

            Lucene.Net.Index.IndexReader r = Lucene.Net.Index.DirectoryReader.Open(luceneIndexDirectory);

            Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(r);
            Lucene.Net.Analysis.Analyzer    analyzer = GetWrappedAnalyzer();

            Lucene.Net.QueryParsers.Classic.QueryParser parser = new Lucene.Net.QueryParsers.Classic.QueryParser(version, "file_name", analyzer);

            // https://stackoverflow.com/questions/15170097/how-to-search-across-all-the-fields
            // Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser parser = new Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser(version, GetFields(r), analyzer);


            Lucene.Net.Search.Query query = parser.Parse(Lucene.Net.QueryParsers.Classic.QueryParser.Escape(phrase));

            Lucene.Net.Search.ScoreDoc[] hits = searcher.Search(query, 10).ScoreDocs;
            foreach (Lucene.Net.Search.ScoreDoc hit in hits)
            {
                Lucene.Net.Documents.Document foundDoc = searcher.Doc(hit.Doc);
                System.Console.WriteLine(hit.Score);
                string full_name = foundDoc.Get("full_name");
                System.Console.WriteLine(full_name);
                // string favoritePhrase = foundDoc.Get("favoritePhrase");
                // System.Console.WriteLine(favoritePhrase);
            } // Next hit
        }     // End Sub SearchPath
Example #15
0
        public string getLemmaNamebyLemmaID(int ID)
        {
            string directory = System.IO.Directory.GetCurrentDirectory();

            string[] splitDir = directory.Split('\\');
            if (splitDir[splitDir.Length - 1] == "Debug")
            {
                System.IO.Directory.SetCurrentDirectory("..\\..\\");
            }


            string results  = null;
            string indexDir = "Index";

            using (Lucene.Net.Store.Directory dir = FSDirectory.Open(indexDir))
                using (IndexSearcher searcher = new IndexSearcher(dir))
                {
                    Term          term;
                    WildcardQuery q  = null;
                    BooleanQuery  bq = new BooleanQuery();

                    term = new Term("LID", ID.ToString());
                    q    = new WildcardQuery(term);
                    bq.Add(q, Occur.SHOULD);


                    TopDocs hits = searcher.Search(bq, 1);
                    foreach (ScoreDoc d in hits.ScoreDocs)
                    {
                        Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc);
                        results = doc.Get("title").ToString();
                    }
                }
            return(results);
        }
        public DocumentOccurence GetOccurences(string term)
        {
            var result      = new DocumentOccurence();
            var indexWriter = GetIndexWriter();
            var reader      = indexWriter.GetReader(true);

            TermQuery     query    = new TermQuery(new Term(IndexFieldNames.Text, term));
            BooleanQuery  theQuery = new BooleanQuery(); theQuery.Add(query, Occur.SHOULD);
            IndexSearcher searcher = new IndexSearcher(reader);

            var searchResults = searcher.Search(query, 10);

            var matches = new HashSet <int>();

            foreach (var docId in searchResults.ScoreDocs)
            {
                Lucene.Net.Documents.Document doc = searcher.Doc(docId.Doc);
                result.AddOccurence(docId.Doc, doc.Get(IndexFieldNames.File), reader.GetTermVector(docId.Doc, IndexFieldNames.Text));
            }

            reader.Dispose();
            indexWriter.Dispose();

            return(result);
        }
Example #17
0
        /// <summary>
        /// Search in the Database to find Lemmas that belong to the choosen Category.
        /// <para><returns>Returns table of string (string[]) </returns> with the Lemma Names.</para>
        /// </summary>
        public string[] SearchByCategory(string categoryName)
        {
            string[] results  = null;
            string   indexDir = "IndexCategory";

            using (Lucene.Net.Store.Directory dir = FSDirectory.Open(indexDir))
                using (IndexSearcher searcher = new IndexSearcher(dir))
                {
                    Term          term;
                    WildcardQuery q  = null;
                    BooleanQuery  bq = new BooleanQuery();

                    term = new Term("Cname", "*" + categoryName + "*");
                    q    = new WildcardQuery(term);
                    bq.Add(q, Occur.MUST);

                    TopDocs hits = searcher.Search(bq, 100);

                    int j = 0;
                    results = new string[hits.TotalHits];
                    foreach (ScoreDoc d in hits.ScoreDocs)
                    {
                        Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc);
                        results[j++] = doc.Get("title").ToString();
                    }
                }
            return(results);
        }
Example #18
0
        public virtual void  TestReverseDateSort()
        {
            IndexSearcher searcher = new IndexSearcher(directory);

            // Create a Sort object.  reverse is set to true.
            // problem occurs only with SortField.AUTO:
            Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true));

            QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer());
            Query       query       = queryParser.Parse("Document");

            // Execute the search and process the search results.
            System.String[] actualOrder = new System.String[5];
            ScoreDoc[]      hits        = searcher.Search(query, null, 1000, sort).ScoreDocs;
            for (int i = 0; i < hits.Length; i++)
            {
                Document      document = searcher.Doc(hits[i].doc);
                System.String text     = document.Get(TEXT_FIELD);
                actualOrder[i] = text;
            }
            searcher.Close();

            // Set up the expected order (i.e. Document 5, 4, 3, 2, 1).
            System.String[] expectedOrder = new System.String[5];
            expectedOrder[0] = "Document 5";
            expectedOrder[1] = "Document 4";
            expectedOrder[2] = "Document 3";
            expectedOrder[3] = "Document 2";
            expectedOrder[4] = "Document 1";

            Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder));
        }
Example #19
0
        public List <LuceneData> MemberSearch(string searchTerm)
        {
            var searchData = new List <LuceneData>();

            try
            {
                Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(_indexFileLocation);


                //create an analyzer to process the text
                Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer();

                //create the query parser, with the default search feild set to "content"
                Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("SearchContent", analyzer);

                //parse the query string into a Query object
                Lucene.Net.Search.Query query = queryParser.Parse(searchTerm);


                //create an index searcher that will perform the search
                //Lucene.Net.Index.IndexReader indexReader = Lucene.Net.Index.IndexReader.Open(dir, true);
                Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true);

                ////build a query object
                //Lucene.Net.Index.Term luceneSearchTerm = new Lucene.Net.Index.Term("searchContent", searchTerm);
                //Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(luceneSearchTerm);

                //execute the query
                Lucene.Net.Search.Hits hits = searcher.Search(query);

                //int resultCount = hits.Length();
                //if (resultCount > 1000){
                //    resultCount = 1000;
                //}

                //iterate over the results.
                for (int i = 0; i < hits.Length(); i++)
                {
                    Lucene.Net.Documents.Document doc = hits.Doc(i);
                    searchData.Add(new LuceneData
                    {
                        MemberID    = Convert.ToInt32(doc.Get("MemberID")),
                        FirstName   = doc.Get("FirstName"),
                        LastName    = doc.Get("LastName"),
                        CompanyName = doc.Get("CompanyName"),
                        City        = doc.Get("City"),
                        State       = doc.Get("State"),
                        PostalCode  = doc.Get("PostalCode")
                    });
                }
            }
            catch (Exception ex)
            {
            }

            return(searchData);
        }
Example #20
0
        //convenience method
        public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer)
        {
            Document doc = reader.Document(docId);

            System.String contents = doc.Get(field);
            if (contents == null)
            {
                throw new System.ArgumentException("Field " + field + " in document #" + docId + " is not stored and cannot be analyzed");
            }
            return(analyzer.TokenStream(field, new System.IO.StringReader(contents)));
        }
Example #21
0
        public int getLemmaIDbyLemmaName(string lemmaName)
        {
            string directory = System.IO.Directory.GetCurrentDirectory();

            string[] splitDir = directory.Split('\\');
            if (splitDir[splitDir.Length - 1] == "Debug")
            {
                System.IO.Directory.SetCurrentDirectory("..\\..\\");
            }

            string[] splitLemmaName = lemmaName.Split('(');

            string results  = null;
            int    id       = -1;
            string indexDir = "Index";

            using (Lucene.Net.Store.Directory dir = FSDirectory.Open(indexDir))
                using (IndexSearcher searcher = new IndexSearcher(dir))
                {
                    string[] splited;

                    Term          term;
                    WildcardQuery q  = null;
                    BooleanQuery  bq = new BooleanQuery();
                    if (splitLemmaName[0].Split(' ').Length > 1)
                    {
                        splited = lemmaName.Split(' ');
                        for (int i = 0; i < splited.Length; i++)
                        {
                            term = new Term("title", "*" + splited[i].ToLower() + "*");
                            q    = new WildcardQuery(term);
                            bq.Add(q, Occur.SHOULD);
                        }
                    }
                    else
                    {
                        term = new Term("title", "*" + splitLemmaName[0].ToLower() + "*");
                        q    = new WildcardQuery(term);
                        bq.Add(q, Occur.SHOULD);
                    }

                    TopDocs hits = searcher.Search(bq, 1);


                    foreach (ScoreDoc d in hits.ScoreDocs)
                    {
                        Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc);
                        results = doc.Get("LID").ToString();
                    }
                }
            Int32.TryParse(results, out id);
            return(id);
        }
        private void  TestLeftOpenRange(int precisionStep)
        {
            System.String field = "field" + precisionStep;
            int           count = 3000;
            long          upper = (count - 1) * distance + (distance / 3) + startOffset;

            //UPGRADE_TODO: The 'System.Int64' structure does not have an equivalent to NULL. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1291'"
            System.Int64      tempAux = (long)upper;
            NumericRangeQuery q       = NumericRangeQuery.NewLongRange(field, precisionStep, null, tempAux, true, true);
            TopDocs           topDocs = searcher.Search(q, null, noDocs, Sort.INDEXORDER);

            System.Console.Out.WriteLine("Found " + q.GetTotalNumberOfTerms() + " distinct terms in left open range for field '" + field + "'.");
            ScoreDoc[] sd = topDocs.scoreDocs;
            Assert.IsNotNull(sd);
            Assert.AreEqual(count, sd.Length, "Score doc count");
            Document doc = searcher.Doc(sd[0].doc);

            Assert.AreEqual(startOffset, System.Int64.Parse(doc.Get(field)), "First doc");
            doc = searcher.Doc(sd[sd.Length - 1].doc);
            Assert.AreEqual((count - 1) * distance + startOffset, System.Int64.Parse(doc.Get(field)), "Last doc");
        }
Example #23
0
        private void SetResults(TopDocs results)
        {
            ResultSet        = new System.Collections.ArrayList();
            ResultSavingInfo = new System.Collections.ArrayList();
            int    rank  = 0;
            string CPath = init.getCollectionPath();


            foreach (ScoreDoc scoreDoc in results.ScoreDocs)
            {
                rank++;
                // retrieve the document from the 'ScoreDoc' object

                Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc);

                int    firstNewLineIndx = doc.Get(".W").ToString().IndexOf("\n");
                string DocId            = doc.Get(".I").ToString().Replace("\n", String.Empty);

                ResultSet.Add("[" + DocId + "]. " +
                              doc.Get(".T").ToString().Replace("\n", String.Empty) + " ( R" + rank + " )\r\n" +
                              "       " + doc.Get(".A").ToString().Replace("\n", String.Empty) + "\r\n" +
                              "       " + doc.Get(".B").ToString().Replace("\n", String.Empty) + "\r\n" +
                              "       " + doc.Get(".W").ToString().Substring(0, firstNewLineIndx) + " ...\r\n" +
                              "       " + "file://" + CPath + "/" + DocId + ".txt");

                ResultSavingInfo.Add(new string[] { QueryID, "Q0", DocId, rank.ToString(), scoreDoc.Score.ToString(), "9583131_9837809_9539361_ACH" });
            }
        }
Example #24
0
 public void SetProperty(Lucene.Net.Documents.Document doc)
 {
     if (doc != null)
     {
         this.SchoolId = XConvert.ToInt32(doc.Get("SchoolId"), -1);
         this.Type     = (SchoolType)XConvert.ToByte(doc.Get("Type"));
         this.Name     = doc.Get("Name");
         this.CnName   = doc.Get("CnName");
         this.Pinyin   = doc.Get("Pinyin");
         this.RegionId = XConvert.ToInt32(doc.Get("RegionId"), -1);
         this.StateId  = XConvert.ToInt32(doc.Get("StateId"), -1);
         this.CityId   = XConvert.ToInt32(doc.Get("CityId"), -1);
     }
 }
Example #25
0
        private static void  IndexDocs(System.IO.FileInfo file)
        {
            if (System.IO.Directory.Exists(file.FullName))
            {
                // if a directory
                System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files
                System.Array.Sort(files);                                                        // sort the files
                for (int i = 0; i < files.Length; i++)
                {
                    // recursively index them
                    IndexDocs(new System.IO.FileInfo(System.IO.Path.Combine(file.FullName, files[i])));
                }
            }
            else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt"))
            {
                // index .txt files

                if (uidIter != null)
                {
                    System.String uid = HTMLDocument.Uid(file);                     // construct uid for doc

                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0)
                    {
                        if (deleting)
                        {
                            // delete stale docs
                            System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text()));
                            reader.DeleteDocuments(uidIter.Term());
                        }
                        uidIter.Next();
                    }
                    if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0)
                    {
                        uidIter.Next();                         // keep matching docs
                    }
                    else if (!deleting)
                    {
                        // add new docs
                        Document doc = HTMLDocument.Document(file);
                        System.Console.Out.WriteLine("adding " + doc.Get("path"));
                        writer.AddDocument(doc);
                    }
                }
                else
                {
                    // creating a new index
                    Document doc = HTMLDocument.Document(file);
                    System.Console.Out.WriteLine("adding " + doc.Get("path"));
                    writer.AddDocument(doc);                     // add docs unconditionally
                }
            }
        }
Example #26
0
        public virtual void  TestDocument()
        {
            Directory      dir1 = GetDir1();
            Directory      dir2 = GetDir2();
            ParallelReader pr   = new ParallelReader();

            pr.Add(IndexReader.Open(dir1));
            pr.Add(IndexReader.Open(dir2));

            Document doc11  = pr.Document(0, new MapFieldSelector(new System.String[] { "f1" }));
            Document doc24  = pr.Document(1, new MapFieldSelector(new System.Collections.ArrayList(new System.String[] { "f4" })));
            Document doc223 = pr.Document(1, new MapFieldSelector(new System.String[] { "f2", "f3" }));

            Assert.AreEqual(1, doc11.GetFields().Count);
            Assert.AreEqual(1, doc24.GetFields().Count);
            Assert.AreEqual(2, doc223.GetFields().Count);

            Assert.AreEqual("v1", doc11.Get("f1"));
            Assert.AreEqual("v2", doc24.Get("f4"));
            Assert.AreEqual("v2", doc223.Get("f2"));
            Assert.AreEqual("v2", doc223.Get("f3"));
        }
Example #27
0
        public virtual void  TestAddIndexes()
        {
            bool optimize = false;

            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            // create the index
            CreateIndexNoClose(!optimize, "index1", writer);
            writer.Flush(false, true, true);

            // create a 2nd index
            Directory   dir2    = new MockRAMDirectory();
            IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer2.SetInfoStream(infoStream);
            CreateIndexNoClose(!optimize, "index2", writer2);
            writer2.Close();

            IndexReader r0 = writer.GetReader();

            Assert.IsTrue(r0.IsCurrent());
            writer.AddIndexesNoOptimize(new Directory[] { dir2 });
            Assert.IsFalse(r0.IsCurrent());
            r0.Close();

            IndexReader r1 = writer.GetReader();

            Assert.IsTrue(r1.IsCurrent());

            writer.Commit();
            Assert.IsTrue(r1.IsCurrent());

            Assert.AreEqual(200, r1.MaxDoc());

            int index2df = r1.DocFreq(new Term("indexname", "index2"));

            Assert.AreEqual(100, index2df);

            // verify the docs are from different indexes
            Document doc5 = r1.Document(5);

            Assert.AreEqual("index1", doc5.Get("indexname"));
            Document doc150 = r1.Document(150);

            Assert.AreEqual("index2", doc150.Get("indexname"));
            r1.Close();
            writer.Close();
            dir1.Close();
        }
Example #28
0
        const string TEXT_PASSAGE = "Passage Text"; //Text fields. For the Passage Text column.
        private void DisplayTopDoc(Lucene.Net.Search.TopDocs results)
        {
            int rank     = 0;
            var searcher = myLuceneApp.CreateSearcher();

            Lucene.Net.Documents.Document doc = null;

            foreach (ScoreDoc scoreDoc in results.ScoreDocs)
            {
                rank++;
                // retrieve the document from the 'ScoreDoc' object
                doc = searcher.Doc(scoreDoc.Doc);
                string field_URL  = doc.Get(TEXT_URL).ToString();
                string field_Text = doc.Get(TEXT_PASSAGE).ToString();
                ResultsUpdate("Rank #" + rank);
                ResultsUpdate("\n");
                ResultsUpdate("Rank #" + rank);
                ResultsUpdate("URL: " + field_URL);
                ResultsUpdate("Passage Text: ");
                ResultsUpdate(field_Text);
                ResultsUpdate("\n");
            }
        }
Example #29
0
            /// <summary>
            /// 任务执行器
            /// </summary>
            public void IndexWriteHandler()
            {
                //创建文档索引编写器
                writer = new Lucene.Net.Index.IndexWriter(Directorys.IndexDirectory, new ThesaurusAnalyzer(), !File.Exists(Directorys.IndexDirectory + "segments.gen"));
                //设置最大碎片缓冲
                writer.SetMaxBufferedDocs(maxBufferLength);
                //首次启动优化
                writer.Optimize();
                int count = 0;

                //处理循环
                while (true)
                {
                    //处理删除队列
                    while (deleteQueue.Count > 0 && count < maxBufferLength)
                    {
                        count++;
                        writer.DeleteDocuments(deleteQueue.Dequeue());
                    }
                    //处理更新队列
                    while (updateQueue.Count > 0 && count < maxBufferLength)
                    {
                        count++;
                        Lucene.Net.Documents.Document doc = updateQueue.Dequeue();
                        writer.UpdateDocument(new Lucene.Net.Index.Term("id", doc.Get("id")), doc);
                    }
                    //处理新增队列
                    while (addQueue.Count > 0 && count < maxBufferLength)
                    {
                        count++;
                        writer.AddDocument(addQueue.Dequeue());
                    }
                    //如果有入档则保存碎片
                    if (writer.NumRamDocs() > 0)
                    {
                        writer.Flush();
                    }
                    //检测处理次数是否达到最大缓冲数,当超过最大缓冲数时优化碎片,否则线程暂停100毫秒
                    if (count >= maxBufferLength)
                    {
                        writer.Optimize();
                        count = 0;
                    }
                    else
                    {
                        Thread.Sleep(100);
                    }
                }
            }
Example #30
0
		public virtual void  TestBinaryField()
		{
			Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
			Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
			Fieldable binaryFld = new Field("binary", (new System.Text.ASCIIEncoding()).GetBytes(binaryVal), Field.Store.YES);
			Fieldable binaryFld2 = new Field("binary", (new System.Text.ASCIIEncoding()).GetBytes(binaryVal2), Field.Store.YES);

			doc.Add(stringFld);
			doc.Add(binaryFld);

			Assert.AreEqual(2, doc.GetFieldsCount());
			
			Assert.IsTrue(binaryFld.IsBinary());
			Assert.IsTrue(binaryFld.IsStored());
			Assert.IsFalse(binaryFld.IsIndexed());
			Assert.IsFalse(binaryFld.IsTokenized());
			
			System.String binaryTest = (new System.Text.ASCIIEncoding()).GetString(doc.GetBinaryValue("binary"));
			Assert.IsTrue(binaryTest.Equals(binaryVal));
			
			System.String stringTest = doc.Get("string");
			Assert.IsTrue(binaryTest.Equals(stringTest));
			
			doc.Add(binaryFld2);
			
			Assert.AreEqual(3, doc.GetFieldsCount());
			
			byte[][] binaryTests = doc.GetBinaryValues("binary");
			
			Assert.AreEqual(2, binaryTests.Length);
			
			binaryTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(binaryTests[0]));
			System.String binaryTest2 = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(binaryTests[1]));
			
			Assert.IsFalse(binaryTest.Equals(binaryTest2));
			
			Assert.IsTrue(binaryTest.Equals(binaryVal));
			Assert.IsTrue(binaryTest2.Equals(binaryVal2));
			
			doc.RemoveField("string");
			Assert.AreEqual(2, doc.GetFieldsCount());
			
			doc.RemoveFields("binary");
			Assert.AreEqual(0, doc.GetFieldsCount());
		}
        private void  TestRightOpenRange(int precisionStep)
        {
            System.String           field   = "field" + precisionStep;
            int                     count   = 3000;
            int                     lower   = (count - 1) * distance + (distance / 3) + startOffset;
            NumericRangeQuery <int> q       = NumericRangeQuery.NewIntRange(field, precisionStep, lower, null, true, true);
            TopDocs                 topDocs = searcher.Search(q, null, noDocs, Sort.INDEXORDER, null);

            System.Console.Out.WriteLine("Found " + q.TotalNumberOfTerms + " distinct terms in right open range for field '" + field + "'.");
            ScoreDoc[] sd = topDocs.ScoreDocs;
            Assert.IsNotNull(sd);
            Assert.AreEqual(noDocs - count, sd.Length, "Score doc count");
            Document doc = searcher.Doc(sd[0].Doc, null);

            Assert.AreEqual(count * distance + startOffset, System.Int32.Parse(doc.Get(field, null)), "First doc");
            doc = searcher.Doc(sd[sd.Length - 1].Doc, null);
            Assert.AreEqual((noDocs - 1) * distance + startOffset, System.Int32.Parse(doc.Get(field, null)), "Last doc");
        }
Example #32
0
        public IndexItem(Lucene.Net.Documents.Document doc, float score)
        {
            luceneDoc = doc;

            docKey = luceneDoc.Get("Key");
            siteID = Convert.ToInt32(luceneDoc.Get("SiteID"), CultureInfo.InvariantCulture);

            PageName = luceneDoc.Get("PageName");
            ModuleTitle = luceneDoc.Get("ModuleTitle");
            Title = luceneDoc.Get("Title");
            intro = luceneDoc.Get("Intro");
            ViewPage = luceneDoc.Get("ViewPage");
            QueryStringAddendum = luceneDoc.Get("QueryStringAddendum");
            bool useQString;
            if (bool.TryParse(luceneDoc.Get("UseQueryStringParams"), out useQString))
            {
                useQueryStringParams = useQString;
            }
            Author = luceneDoc.GetNullSafeString("Author");

            // the below are lazy loaded if accessed from the public getters

            //ViewRoles = luceneDoc.Get("ViewRoles");
            //ModuleViewRoles = luceneDoc.Get("ModuleRole");
            //SiteId = Convert.ToInt32(luceneDoc.Get("SiteID"), CultureInfo.InvariantCulture);
            //PageId = Convert.ToInt32(luceneDoc.Get("PageID"), CultureInfo.InvariantCulture);

            //PageIndex = Convert.ToInt32(luceneDoc.Get("PageIndex"), CultureInfo.InvariantCulture);
            //PageNumber = Convert.ToInt32(luceneDoc.Get("PageNumber"), CultureInfo.InvariantCulture);

            //string fid = luceneDoc.Get("FeatureId");
            //if ((fid != null)&&(fid.Length > 0))
            //{
            //    FeatureId = fid;
            //}
            //FeatureName = luceneDoc.Get("FeatureName");
            //ItemId = Convert.ToInt32(luceneDoc.Get("ItemID"), CultureInfo.InvariantCulture);
            //ModuleId = Convert.ToInt32(luceneDoc.Get("ModuleID"), CultureInfo.InvariantCulture);

            //DateTime pubBegin = DateTime.MinValue;
            //if (DateTime.TryParse(luceneDoc.Get("PublishBeginDate"), out pubBegin))
            //{
            //    this.publishBeginDate = pubBegin;
            //}

            //DateTime pubEnd = DateTime.MaxValue;
            //if (DateTime.TryParse(luceneDoc.Get("PublishEndDate"), out pubEnd))
            //{
            //    this.publishEndDate = pubEnd;
            //}

            //try
            //{
            //    long createdTicks = Convert.ToInt64(luceneDoc.Get("CreatedUtc"));
            //    CreatedUtc = new DateTime(createdTicks);
            //}
            //catch (FormatException) { }

            //try
            //{
            //    long lastModTicks = Convert.ToInt64(luceneDoc.Get("LastModUtc"));
            //    LastModUtc = new DateTime(lastModTicks);
            //}
            //catch (FormatException) { }

            //boost = doc.GetBoost();
            //boost = luceneDoc.Boost;
        }