/* * public void testTermRepeatedQuery() throws IOException, ParseException { * // TODO: this corner case yields different results. * checkQuery("multi* multi* foo"); * } */ /// <summary> checks if a query yields the same result when executed on /// a single IndexSearcher containing all documents and on a /// MultiSearcher aggregating sub-searchers /// </summary> /// <param name="queryStr"> the query to check. /// </param> /// <throws> IOException </throws> /// <throws> ParseException </throws> private void CheckQuery(System.String queryStr) { // check result hit ranking if (verbose) { System.Console.Out.WriteLine("Query: " + queryStr); } QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); Query query = queryParser.Parse(queryStr); ScoreDoc[] multiSearcherHits = multiSearcher.Search(query, null, 1000).scoreDocs; ScoreDoc[] singleSearcherHits = singleSearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(multiSearcherHits.Length, singleSearcherHits.Length); for (int i = 0; i < multiSearcherHits.Length; i++) { Document docMulti = multiSearcher.Doc(multiSearcherHits[i].doc); Document docSingle = singleSearcher.Doc(singleSearcherHits[i].doc); if (verbose) { System.Console.Out.WriteLine("Multi: " + docMulti.Get(FIELD_NAME) + " score=" + multiSearcherHits[i].score); } if (verbose) { System.Console.Out.WriteLine("Single: " + docSingle.Get(FIELD_NAME) + " score=" + singleSearcherHits[i].score); } Assert.AreEqual(multiSearcherHits[i].score, singleSearcherHits[i].score, 0.001f); Assert.AreEqual(docMulti.Get(FIELD_NAME), docSingle.Get(FIELD_NAME)); } if (verbose) { System.Console.Out.WriteLine(); } }
/// <summary> /// Search for files. /// </summary> /// <param name="queryText">The query text.</param> /// <returns>The files that match the query text.</returns> public SourceFile[] Search(string queryText) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser( Lucene.Net.Util.Version.LUCENE_30, "body", _analyzer); Lucene.Net.Search.Query query = parser.Parse(queryText); using (Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(_directory, true)) { Lucene.Net.Search.TopDocs result = searcher.Search(query, int.MaxValue); List <SourceFile> files = new List <SourceFile>(); foreach (Lucene.Net.Search.ScoreDoc d in result.ScoreDocs) { Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc); files.Add(new SourceFile( doc.Get("id"), doc.Get("type"), doc.Get("name"), doc.Get("fileName"), null)); } return(files.ToArray()); } }
public void SetProperty(Lucene.Net.Documents.Document doc) { if (doc != null) { this.SpecialtyId = Convert.ToInt32(doc.Get("SpecialtyId")); this.Name = doc.Get("Name"); } }
// Mapping Lucene data private Message MapLuceneData(Lucene.Net.Documents.Document doc) { return(new Message { ID = (doc.Get("MessageId").ToType <int>()), MessageText = (doc.Get("Message")) }); }
public void SetProperty(Lucene.Net.Documents.Document doc) { if (doc != null) { this.UniversityId = Convert.ToInt32(doc.Get("UniversityId")); this.Name = doc.Get("CnName"); this.CnName = doc.Get("CnName"); this.Pinyin = doc.Get("Pinyin"); } }
public List <indexVideo> searchArticles(string queryString, int numberOfResults, bool komin) { List <indexVideo> resultsList = new List <indexVideo>(); Lucene.Net.Store.Directory index = publicIndex; if (komin) { index = kominIndex; } if (!string.IsNullOrEmpty(queryString)) { Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new[] { "title", "shortDescription" }, analyser); try { Lucene.Net.Search.Query query = parser.Parse(queryString + "~"); Lucene.Net.Search.Searcher searcher = new Lucene.Net.Search.IndexSearcher(index, true); Lucene.Net.Search.TopScoreDocCollector collector = Lucene.Net.Search.TopScoreDocCollector.Create(numberOfResults, true); searcher.Search(query, collector); Lucene.Net.Search.ScoreDoc[] hits = collector.TopDocs().ScoreDocs; if (hits.Length >= 1) { for (int i = 0; i < hits.Length; i++) { indexVideo video = new indexVideo(); int docId = hits[i].Doc; float score = hits[i].Score; Lucene.Net.Documents.Document doc = searcher.Doc(docId); video.bctid = doc.Get("bctid"); video.title = doc.Get("title"); video.score = score; video.shortDescription = doc.Get("shortDescription"); video.imageURL = doc.Get("imageURL"); resultsList.Add(video); } } } catch (Exception e) { } } return(resultsList); }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexNoAdds(System.String dirName, bool autoCommit) { dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName)); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length, "wrong number of hits"); Document d = searcher.Doc(hits[0].doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); searcher.Close(); // make sure we can do a delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.GetIndexReader()); searcher.Close(); // optimize IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.GetIndexReader()); searcher.Close(); dir.Close(); }
/// <summary> /// 命中结果构造函数 /// </summary> /// <param name="doc">索引档</param> /// <param name="offset">关键字在正文中的位置</param> internal Hit(Lucene.Net.Documents.Document doc, int offset) { base.id = doc.Get("id"); base.lastIndex = Lucene.Net.Documents.DateField.StringToDate(doc.Get("date")); //到开外部存放的文档实体 StoreReader story = new StoreReader(Directorys.StoreDirectory + Math.Ceiling(Double.Parse(base.id) / 10000D).ToString("f0") + @"\" + base.id + ".gz"); //读取已保存的文章头 base.author = story.ReadLine(); base.cat = story.ReadLine(); base.tag = story.ReadLine(); base.title = story.ReadLine(); base.path = story.ReadLine(); int readed = 0; int len = 126;//显示内容长度 char[] block = new char[offset + len]; //读取正文至关键字后len个字符 readed = story.ReadBlock(block, 0, block.Length); story.Close(); int index = offset; //如果关键字不在结尾处则摘要起始位置定位于关键字前一个标点符号之后,否则摘要取末尾的len个字符 if (readed == block.Length) { UnicodeCategory category; for (; index > 0; index--) { category = Char.GetUnicodeCategory(Char.ToLower(block[index])); if (category == UnicodeCategory.OtherPunctuation) { index += 1; break; } } } else { index = Math.Max(0, readed - len); } //如果摘要不在结尾处则在后面添加“...” base.body = (new String(block, index, Math.Min(len - 1, readed))) + ((readed >= index + len) ? "..." : ""); }
public virtual void TestFieldSelector() { RAMDirectory ramDirectory1, ramDirectory2; IndexSearcher indexSearcher1, indexSearcher2; ramDirectory1 = new RAMDirectory(); ramDirectory2 = new RAMDirectory(); Query query = new TermQuery(new Term("contents", "doc0")); // Now put the documents in a different index InitIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc... InitIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1 = new IndexSearcher(ramDirectory1, true); indexSearcher2 = new IndexSearcher(ramDirectory2, true); MultiSearcher searcher = GetMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 }); Assert.IsTrue(searcher != null, "searcher is null and it shouldn't be"); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); Assert.IsTrue(hits.Length == 2, hits.Length + " does not equal: " + 2); Document document = searcher.Doc(hits[0].Doc); Assert.IsTrue(document != null, "document is null and it shouldn't be"); Assert.IsTrue(document.GetFields().Count == 2, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 2); //Should be one document from each directory //they both have two fields, contents and other ISet <string> ftl = Support.Compatibility.SetFactory.CreateHashSet <string>(); ftl.Add("other"); SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Support.Compatibility.SetFactory.CreateHashSet <string>()); document = searcher.Doc(hits[0].Doc, fs); Assert.IsTrue(document != null, "document is null and it shouldn't be"); Assert.IsTrue(document.GetFields().Count == 1, "document.getFields() Size: " + document.GetFields().Count + " is not: " + 1); System.String value_Renamed = document.Get("contents"); Assert.IsTrue(value_Renamed == null, "value is not null and it should be"); value_Renamed = document.Get("other"); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); ftl.Clear(); ftl.Add("contents"); fs = new SetBasedFieldSelector(ftl, Support.Compatibility.SetFactory.CreateHashSet <string>()); document = searcher.Doc(hits[1].Doc, fs); value_Renamed = document.Get("contents"); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); value_Renamed = document.Get("other"); Assert.IsTrue(value_Renamed == null, "value is not null and it should be"); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].Doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
private void QueryTest(Query query) { ScoreDoc[] parallelHits = parallel.Search(query, null, 1000).scoreDocs; ScoreDoc[] singleHits = single.Search(query, null, 1000).scoreDocs; Assert.AreEqual(parallelHits.Length, singleHits.Length); for (int i = 0; i < parallelHits.Length; i++) { Assert.AreEqual(parallelHits[i].score, singleHits[i].score, 0.001f); Document docParallel = parallel.Doc(parallelHits[i].doc); Document docSingle = single.Doc(singleHits[i].doc); Assert.AreEqual(docParallel.Get("f1"), docSingle.Get("f1")); Assert.AreEqual(docParallel.Get("f2"), docSingle.Get("f2")); Assert.AreEqual(docParallel.Get("f3"), docSingle.Get("f3")); Assert.AreEqual(docParallel.Get("f4"), docSingle.Get("f4")); } }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory, true, null); Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true)); QueryParser queryParser = new QueryParser(Util.Version.LUCENE_CURRENT, TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort, null).ScoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].Doc, null); System.String text = document.Get(TEXT_FIELD, null); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder)); }
// Mapping Lucene data private LuceneData MapLuceneData(Lucene.Net.Documents.Document doc) { return(new LuceneData { Actor = (doc.Get("actors")) }); }
} // End Sub BuildIndex // https://lucenenet.apache.org/ // https://www.codeproject.com/Articles/609980/Small-Lucene-NET-Demo-App // https://stackoverflow.com/questions/12600196/lucene-how-to-index-file-names private static void SearchPath(string phrase, string indexPath) { Lucene.Net.Util.LuceneVersion version = Lucene.Net.Util.LuceneVersion.LUCENE_48; Lucene.Net.Store.Directory luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); Lucene.Net.Index.IndexReader r = Lucene.Net.Index.DirectoryReader.Open(luceneIndexDirectory); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(r); Lucene.Net.Analysis.Analyzer analyzer = GetWrappedAnalyzer(); Lucene.Net.QueryParsers.Classic.QueryParser parser = new Lucene.Net.QueryParsers.Classic.QueryParser(version, "file_name", analyzer); // https://stackoverflow.com/questions/15170097/how-to-search-across-all-the-fields // Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser parser = new Lucene.Net.QueryParsers.Classic.MultiFieldQueryParser(version, GetFields(r), analyzer); Lucene.Net.Search.Query query = parser.Parse(Lucene.Net.QueryParsers.Classic.QueryParser.Escape(phrase)); Lucene.Net.Search.ScoreDoc[] hits = searcher.Search(query, 10).ScoreDocs; foreach (Lucene.Net.Search.ScoreDoc hit in hits) { Lucene.Net.Documents.Document foundDoc = searcher.Doc(hit.Doc); System.Console.WriteLine(hit.Score); string full_name = foundDoc.Get("full_name"); System.Console.WriteLine(full_name); // string favoritePhrase = foundDoc.Get("favoritePhrase"); // System.Console.WriteLine(favoritePhrase); } // Next hit } // End Sub SearchPath
public string getLemmaNamebyLemmaID(int ID) { string directory = System.IO.Directory.GetCurrentDirectory(); string[] splitDir = directory.Split('\\'); if (splitDir[splitDir.Length - 1] == "Debug") { System.IO.Directory.SetCurrentDirectory("..\\..\\"); } string results = null; string indexDir = "Index"; using (Lucene.Net.Store.Directory dir = FSDirectory.Open(indexDir)) using (IndexSearcher searcher = new IndexSearcher(dir)) { Term term; WildcardQuery q = null; BooleanQuery bq = new BooleanQuery(); term = new Term("LID", ID.ToString()); q = new WildcardQuery(term); bq.Add(q, Occur.SHOULD); TopDocs hits = searcher.Search(bq, 1); foreach (ScoreDoc d in hits.ScoreDocs) { Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc); results = doc.Get("title").ToString(); } } return(results); }
public DocumentOccurence GetOccurences(string term) { var result = new DocumentOccurence(); var indexWriter = GetIndexWriter(); var reader = indexWriter.GetReader(true); TermQuery query = new TermQuery(new Term(IndexFieldNames.Text, term)); BooleanQuery theQuery = new BooleanQuery(); theQuery.Add(query, Occur.SHOULD); IndexSearcher searcher = new IndexSearcher(reader); var searchResults = searcher.Search(query, 10); var matches = new HashSet <int>(); foreach (var docId in searchResults.ScoreDocs) { Lucene.Net.Documents.Document doc = searcher.Doc(docId.Doc); result.AddOccurence(docId.Doc, doc.Get(IndexFieldNames.File), reader.GetTermVector(docId.Doc, IndexFieldNames.Text)); } reader.Dispose(); indexWriter.Dispose(); return(result); }
/// <summary> /// Search in the Database to find Lemmas that belong to the choosen Category. /// <para><returns>Returns table of string (string[]) </returns> with the Lemma Names.</para> /// </summary> public string[] SearchByCategory(string categoryName) { string[] results = null; string indexDir = "IndexCategory"; using (Lucene.Net.Store.Directory dir = FSDirectory.Open(indexDir)) using (IndexSearcher searcher = new IndexSearcher(dir)) { Term term; WildcardQuery q = null; BooleanQuery bq = new BooleanQuery(); term = new Term("Cname", "*" + categoryName + "*"); q = new WildcardQuery(term); bq.Add(q, Occur.MUST); TopDocs hits = searcher.Search(bq, 100); int j = 0; results = new string[hits.TotalHits]; foreach (ScoreDoc d in hits.ScoreDocs) { Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc); results[j++] = doc.Get("title").ToString(); } } return(results); }
public virtual void TestReverseDateSort() { IndexSearcher searcher = new IndexSearcher(directory); // Create a Sort object. reverse is set to true. // problem occurs only with SortField.AUTO: Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true)); QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer()); Query query = queryParser.Parse("Document"); // Execute the search and process the search results. System.String[] actualOrder = new System.String[5]; ScoreDoc[] hits = searcher.Search(query, null, 1000, sort).ScoreDocs; for (int i = 0; i < hits.Length; i++) { Document document = searcher.Doc(hits[i].doc); System.String text = document.Get(TEXT_FIELD); actualOrder[i] = text; } searcher.Close(); // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). System.String[] expectedOrder = new System.String[5]; expectedOrder[0] = "Document 5"; expectedOrder[1] = "Document 4"; expectedOrder[2] = "Document 3"; expectedOrder[3] = "Document 2"; expectedOrder[4] = "Document 1"; Assert.AreEqual(new System.Collections.ArrayList(expectedOrder), new System.Collections.ArrayList(actualOrder)); }
public List <LuceneData> MemberSearch(string searchTerm) { var searchData = new List <LuceneData>(); try { Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(_indexFileLocation); //create an analyzer to process the text Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); //create the query parser, with the default search feild set to "content" Lucene.Net.QueryParsers.QueryParser queryParser = new Lucene.Net.QueryParsers.QueryParser("SearchContent", analyzer); //parse the query string into a Query object Lucene.Net.Search.Query query = queryParser.Parse(searchTerm); //create an index searcher that will perform the search //Lucene.Net.Index.IndexReader indexReader = Lucene.Net.Index.IndexReader.Open(dir, true); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); ////build a query object //Lucene.Net.Index.Term luceneSearchTerm = new Lucene.Net.Index.Term("searchContent", searchTerm); //Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(luceneSearchTerm); //execute the query Lucene.Net.Search.Hits hits = searcher.Search(query); //int resultCount = hits.Length(); //if (resultCount > 1000){ // resultCount = 1000; //} //iterate over the results. for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); searchData.Add(new LuceneData { MemberID = Convert.ToInt32(doc.Get("MemberID")), FirstName = doc.Get("FirstName"), LastName = doc.Get("LastName"), CompanyName = doc.Get("CompanyName"), City = doc.Get("City"), State = doc.Get("State"), PostalCode = doc.Get("PostalCode") }); } } catch (Exception ex) { } return(searchData); }
//convenience method public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer) { Document doc = reader.Document(docId); System.String contents = doc.Get(field); if (contents == null) { throw new System.ArgumentException("Field " + field + " in document #" + docId + " is not stored and cannot be analyzed"); } return(analyzer.TokenStream(field, new System.IO.StringReader(contents))); }
public int getLemmaIDbyLemmaName(string lemmaName) { string directory = System.IO.Directory.GetCurrentDirectory(); string[] splitDir = directory.Split('\\'); if (splitDir[splitDir.Length - 1] == "Debug") { System.IO.Directory.SetCurrentDirectory("..\\..\\"); } string[] splitLemmaName = lemmaName.Split('('); string results = null; int id = -1; string indexDir = "Index"; using (Lucene.Net.Store.Directory dir = FSDirectory.Open(indexDir)) using (IndexSearcher searcher = new IndexSearcher(dir)) { string[] splited; Term term; WildcardQuery q = null; BooleanQuery bq = new BooleanQuery(); if (splitLemmaName[0].Split(' ').Length > 1) { splited = lemmaName.Split(' '); for (int i = 0; i < splited.Length; i++) { term = new Term("title", "*" + splited[i].ToLower() + "*"); q = new WildcardQuery(term); bq.Add(q, Occur.SHOULD); } } else { term = new Term("title", "*" + splitLemmaName[0].ToLower() + "*"); q = new WildcardQuery(term); bq.Add(q, Occur.SHOULD); } TopDocs hits = searcher.Search(bq, 1); foreach (ScoreDoc d in hits.ScoreDocs) { Lucene.Net.Documents.Document doc = searcher.Doc(d.Doc); results = doc.Get("LID").ToString(); } } Int32.TryParse(results, out id); return(id); }
private void TestLeftOpenRange(int precisionStep) { System.String field = "field" + precisionStep; int count = 3000; long upper = (count - 1) * distance + (distance / 3) + startOffset; //UPGRADE_TODO: The 'System.Int64' structure does not have an equivalent to NULL. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1291'" System.Int64 tempAux = (long)upper; NumericRangeQuery q = NumericRangeQuery.NewLongRange(field, precisionStep, null, tempAux, true, true); TopDocs topDocs = searcher.Search(q, null, noDocs, Sort.INDEXORDER); System.Console.Out.WriteLine("Found " + q.GetTotalNumberOfTerms() + " distinct terms in left open range for field '" + field + "'."); ScoreDoc[] sd = topDocs.scoreDocs; Assert.IsNotNull(sd); Assert.AreEqual(count, sd.Length, "Score doc count"); Document doc = searcher.Doc(sd[0].doc); Assert.AreEqual(startOffset, System.Int64.Parse(doc.Get(field)), "First doc"); doc = searcher.Doc(sd[sd.Length - 1].doc); Assert.AreEqual((count - 1) * distance + startOffset, System.Int64.Parse(doc.Get(field)), "Last doc"); }
private void SetResults(TopDocs results) { ResultSet = new System.Collections.ArrayList(); ResultSavingInfo = new System.Collections.ArrayList(); int rank = 0; string CPath = init.getCollectionPath(); foreach (ScoreDoc scoreDoc in results.ScoreDocs) { rank++; // retrieve the document from the 'ScoreDoc' object Lucene.Net.Documents.Document doc = searcher.Doc(scoreDoc.Doc); int firstNewLineIndx = doc.Get(".W").ToString().IndexOf("\n"); string DocId = doc.Get(".I").ToString().Replace("\n", String.Empty); ResultSet.Add("[" + DocId + "]. " + doc.Get(".T").ToString().Replace("\n", String.Empty) + " ( R" + rank + " )\r\n" + " " + doc.Get(".A").ToString().Replace("\n", String.Empty) + "\r\n" + " " + doc.Get(".B").ToString().Replace("\n", String.Empty) + "\r\n" + " " + doc.Get(".W").ToString().Substring(0, firstNewLineIndx) + " ...\r\n" + " " + "file://" + CPath + "/" + DocId + ".txt"); ResultSavingInfo.Add(new string[] { QueryID, "Q0", DocId, rank.ToString(), scoreDoc.Score.ToString(), "9583131_9837809_9539361_ACH" }); } }
public void SetProperty(Lucene.Net.Documents.Document doc) { if (doc != null) { this.SchoolId = XConvert.ToInt32(doc.Get("SchoolId"), -1); this.Type = (SchoolType)XConvert.ToByte(doc.Get("Type")); this.Name = doc.Get("Name"); this.CnName = doc.Get("CnName"); this.Pinyin = doc.Get("Pinyin"); this.RegionId = XConvert.ToInt32(doc.Get("RegionId"), -1); this.StateId = XConvert.ToInt32(doc.Get("StateId"), -1); this.CityId = XConvert.ToInt32(doc.Get("CityId"), -1); } }
private static void IndexDocs(System.IO.FileInfo file) { if (System.IO.Directory.Exists(file.FullName)) { // if a directory System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files System.Array.Sort(files); // sort the files for (int i = 0; i < files.Length; i++) { // recursively index them IndexDocs(new System.IO.FileInfo(System.IO.Path.Combine(file.FullName, files[i]))); } } else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt")) { // index .txt files if (uidIter != null) { System.String uid = HTMLDocument.Uid(file); // construct uid for doc while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0) { if (deleting) { // delete stale docs System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text())); reader.DeleteDocuments(uidIter.Term()); } uidIter.Next(); } if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0) { uidIter.Next(); // keep matching docs } else if (!deleting) { // add new docs Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); } } else { // creating a new index Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); // add docs unconditionally } } }
public virtual void TestDocument() { Directory dir1 = GetDir1(); Directory dir2 = GetDir2(); ParallelReader pr = new ParallelReader(); pr.Add(IndexReader.Open(dir1)); pr.Add(IndexReader.Open(dir2)); Document doc11 = pr.Document(0, new MapFieldSelector(new System.String[] { "f1" })); Document doc24 = pr.Document(1, new MapFieldSelector(new System.Collections.ArrayList(new System.String[] { "f4" }))); Document doc223 = pr.Document(1, new MapFieldSelector(new System.String[] { "f2", "f3" })); Assert.AreEqual(1, doc11.GetFields().Count); Assert.AreEqual(1, doc24.GetFields().Count); Assert.AreEqual(2, doc223.GetFields().Count); Assert.AreEqual("v1", doc11.Get("f1")); Assert.AreEqual("v2", doc24.Get("f4")); Assert.AreEqual("v2", doc223.Get("f2")); Assert.AreEqual("v2", doc223.Get("f3")); }
public virtual void TestAddIndexes() { bool optimize = false; Directory dir1 = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetInfoStream(infoStream); // create the index CreateIndexNoClose(!optimize, "index1", writer); writer.Flush(false, true, true); // create a 2nd index Directory dir2 = new MockRAMDirectory(); IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer2.SetInfoStream(infoStream); CreateIndexNoClose(!optimize, "index2", writer2); writer2.Close(); IndexReader r0 = writer.GetReader(); Assert.IsTrue(r0.IsCurrent()); writer.AddIndexesNoOptimize(new Directory[] { dir2 }); Assert.IsFalse(r0.IsCurrent()); r0.Close(); IndexReader r1 = writer.GetReader(); Assert.IsTrue(r1.IsCurrent()); writer.Commit(); Assert.IsTrue(r1.IsCurrent()); Assert.AreEqual(200, r1.MaxDoc()); int index2df = r1.DocFreq(new Term("indexname", "index2")); Assert.AreEqual(100, index2df); // verify the docs are from different indexes Document doc5 = r1.Document(5); Assert.AreEqual("index1", doc5.Get("indexname")); Document doc150 = r1.Document(150); Assert.AreEqual("index2", doc150.Get("indexname")); r1.Close(); writer.Close(); dir1.Close(); }
const string TEXT_PASSAGE = "Passage Text"; //Text fields. For the Passage Text column. private void DisplayTopDoc(Lucene.Net.Search.TopDocs results) { int rank = 0; var searcher = myLuceneApp.CreateSearcher(); Lucene.Net.Documents.Document doc = null; foreach (ScoreDoc scoreDoc in results.ScoreDocs) { rank++; // retrieve the document from the 'ScoreDoc' object doc = searcher.Doc(scoreDoc.Doc); string field_URL = doc.Get(TEXT_URL).ToString(); string field_Text = doc.Get(TEXT_PASSAGE).ToString(); ResultsUpdate("Rank #" + rank); ResultsUpdate("\n"); ResultsUpdate("Rank #" + rank); ResultsUpdate("URL: " + field_URL); ResultsUpdate("Passage Text: "); ResultsUpdate(field_Text); ResultsUpdate("\n"); } }
/// <summary> /// 任务执行器 /// </summary> public void IndexWriteHandler() { //创建文档索引编写器 writer = new Lucene.Net.Index.IndexWriter(Directorys.IndexDirectory, new ThesaurusAnalyzer(), !File.Exists(Directorys.IndexDirectory + "segments.gen")); //设置最大碎片缓冲 writer.SetMaxBufferedDocs(maxBufferLength); //首次启动优化 writer.Optimize(); int count = 0; //处理循环 while (true) { //处理删除队列 while (deleteQueue.Count > 0 && count < maxBufferLength) { count++; writer.DeleteDocuments(deleteQueue.Dequeue()); } //处理更新队列 while (updateQueue.Count > 0 && count < maxBufferLength) { count++; Lucene.Net.Documents.Document doc = updateQueue.Dequeue(); writer.UpdateDocument(new Lucene.Net.Index.Term("id", doc.Get("id")), doc); } //处理新增队列 while (addQueue.Count > 0 && count < maxBufferLength) { count++; writer.AddDocument(addQueue.Dequeue()); } //如果有入档则保存碎片 if (writer.NumRamDocs() > 0) { writer.Flush(); } //检测处理次数是否达到最大缓冲数,当超过最大缓冲数时优化碎片,否则线程暂停100毫秒 if (count >= maxBufferLength) { writer.Optimize(); count = 0; } else { Thread.Sleep(100); } } }
public virtual void TestBinaryField() { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO); Fieldable binaryFld = new Field("binary", (new System.Text.ASCIIEncoding()).GetBytes(binaryVal), Field.Store.YES); Fieldable binaryFld2 = new Field("binary", (new System.Text.ASCIIEncoding()).GetBytes(binaryVal2), Field.Store.YES); doc.Add(stringFld); doc.Add(binaryFld); Assert.AreEqual(2, doc.GetFieldsCount()); Assert.IsTrue(binaryFld.IsBinary()); Assert.IsTrue(binaryFld.IsStored()); Assert.IsFalse(binaryFld.IsIndexed()); Assert.IsFalse(binaryFld.IsTokenized()); System.String binaryTest = (new System.Text.ASCIIEncoding()).GetString(doc.GetBinaryValue("binary")); Assert.IsTrue(binaryTest.Equals(binaryVal)); System.String stringTest = doc.Get("string"); Assert.IsTrue(binaryTest.Equals(stringTest)); doc.Add(binaryFld2); Assert.AreEqual(3, doc.GetFieldsCount()); byte[][] binaryTests = doc.GetBinaryValues("binary"); Assert.AreEqual(2, binaryTests.Length); binaryTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(binaryTests[0])); System.String binaryTest2 = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(binaryTests[1])); Assert.IsFalse(binaryTest.Equals(binaryTest2)); Assert.IsTrue(binaryTest.Equals(binaryVal)); Assert.IsTrue(binaryTest2.Equals(binaryVal2)); doc.RemoveField("string"); Assert.AreEqual(2, doc.GetFieldsCount()); doc.RemoveFields("binary"); Assert.AreEqual(0, doc.GetFieldsCount()); }
private void TestRightOpenRange(int precisionStep) { System.String field = "field" + precisionStep; int count = 3000; int lower = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQuery <int> q = NumericRangeQuery.NewIntRange(field, precisionStep, lower, null, true, true); TopDocs topDocs = searcher.Search(q, null, noDocs, Sort.INDEXORDER, null); System.Console.Out.WriteLine("Found " + q.TotalNumberOfTerms + " distinct terms in right open range for field '" + field + "'."); ScoreDoc[] sd = topDocs.ScoreDocs; Assert.IsNotNull(sd); Assert.AreEqual(noDocs - count, sd.Length, "Score doc count"); Document doc = searcher.Doc(sd[0].Doc, null); Assert.AreEqual(count * distance + startOffset, System.Int32.Parse(doc.Get(field, null)), "First doc"); doc = searcher.Doc(sd[sd.Length - 1].Doc, null); Assert.AreEqual((noDocs - 1) * distance + startOffset, System.Int32.Parse(doc.Get(field, null)), "Last doc"); }
public IndexItem(Lucene.Net.Documents.Document doc, float score) { luceneDoc = doc; docKey = luceneDoc.Get("Key"); siteID = Convert.ToInt32(luceneDoc.Get("SiteID"), CultureInfo.InvariantCulture); PageName = luceneDoc.Get("PageName"); ModuleTitle = luceneDoc.Get("ModuleTitle"); Title = luceneDoc.Get("Title"); intro = luceneDoc.Get("Intro"); ViewPage = luceneDoc.Get("ViewPage"); QueryStringAddendum = luceneDoc.Get("QueryStringAddendum"); bool useQString; if (bool.TryParse(luceneDoc.Get("UseQueryStringParams"), out useQString)) { useQueryStringParams = useQString; } Author = luceneDoc.GetNullSafeString("Author"); // the below are lazy loaded if accessed from the public getters //ViewRoles = luceneDoc.Get("ViewRoles"); //ModuleViewRoles = luceneDoc.Get("ModuleRole"); //SiteId = Convert.ToInt32(luceneDoc.Get("SiteID"), CultureInfo.InvariantCulture); //PageId = Convert.ToInt32(luceneDoc.Get("PageID"), CultureInfo.InvariantCulture); //PageIndex = Convert.ToInt32(luceneDoc.Get("PageIndex"), CultureInfo.InvariantCulture); //PageNumber = Convert.ToInt32(luceneDoc.Get("PageNumber"), CultureInfo.InvariantCulture); //string fid = luceneDoc.Get("FeatureId"); //if ((fid != null)&&(fid.Length > 0)) //{ // FeatureId = fid; //} //FeatureName = luceneDoc.Get("FeatureName"); //ItemId = Convert.ToInt32(luceneDoc.Get("ItemID"), CultureInfo.InvariantCulture); //ModuleId = Convert.ToInt32(luceneDoc.Get("ModuleID"), CultureInfo.InvariantCulture); //DateTime pubBegin = DateTime.MinValue; //if (DateTime.TryParse(luceneDoc.Get("PublishBeginDate"), out pubBegin)) //{ // this.publishBeginDate = pubBegin; //} //DateTime pubEnd = DateTime.MaxValue; //if (DateTime.TryParse(luceneDoc.Get("PublishEndDate"), out pubEnd)) //{ // this.publishEndDate = pubEnd; //} //try //{ // long createdTicks = Convert.ToInt64(luceneDoc.Get("CreatedUtc")); // CreatedUtc = new DateTime(createdTicks); //} //catch (FormatException) { } //try //{ // long lastModTicks = Convert.ToInt64(luceneDoc.Get("LastModUtc")); // LastModUtc = new DateTime(lastModTicks); //} //catch (FormatException) { } //boost = doc.GetBoost(); //boost = luceneDoc.Boost; }