private void Initialize() { _directory = LuceneFSDirectory.Open(new DirectoryInfo(Path.Combine(_basePath, Name))); // Creates the index directory using (CreateIndexWriter()) { } }
/// <summary> /// Initializes a new instance of the <see cref="Indexer"/> class. /// </summary> /// <param name="path">The path to the .xml.bz2 dump of wikipedia</param> public Indexer(string path) { filePath = path; indexPath = Path.ChangeExtension(path, ".idx"); Lucene.Net.Store.Directory idxDir = FSDirectory.Open(new DirectoryInfo(indexPath)); if (Directory.Exists(indexPath) && IndexReader.IndexExists(idxDir)) { indexExists = true; } if (indexExists) { searcher = new IndexSearcher(idxDir, true); } textAnalyzer = GuessAnalyzer(filePath, out _IsRTL); queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "title", textAnalyzer); queryParser.SetDefaultOperator(QueryParser.Operator.AND); abortIndexing = false; multithreadedIndexing = (Environment.ProcessorCount > 1); }
public static void Main(System.String[] args) { System.String usage = typeof(DeleteFiles) + " <unique_term>"; if (args.Length == 0) { System.Console.Error.WriteLine("Usage: " + usage); System.Environment.Exit(1); } try { Directory directory = FSDirectory.Open(new System.IO.FileInfo("index")); IndexReader reader = IndexReader.Open(directory, false); // we don't want read-only because we are about to delete Term term = new Term("path", args[0]); int deleted = reader.DeleteDocuments(term); System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term); // one can also delete documents by their internal id: /* * for (int i = 0; i < reader.maxDoc(); i++) { * System.out.println("Deleting document with id " + i); * reader.delete(i); * }*/ reader.Close(); directory.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void TestOptimizeOldIndex() { int hasTested29 = 0; for (int i = 0; i < oldNames.Length; i++) { System.String dirName = Paths.CombinePath(Paths.ProjectRootDirectory, "test/core/index/index." + oldNames[i]); Unzip(dirName, oldNames[i]); System.String fullPath = FullDir(oldNames[i]); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(fullPath)); if (oldNames[i].StartsWith("29.")) { assertCompressedFields29(dir, true); hasTested29++; } IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); w.Optimize(); w.Close(); _TestUtil.CheckIndex(dir); if (oldNames[i].StartsWith("29.")) { assertCompressedFields29(dir, false); hasTested29++; } dir.Close(); RmDir(oldNames[i]); } Assert.AreEqual(4, hasTested29, "test for compressed field should have run 4 times"); }
static void Query() { try { // Counterpart to the IndexWriter used above. IndexReader reader = IndexReader.Open(FSDirectory.Open(INDEX_DIR), true); Console.Out.WriteLine("Number of indexed docs: " + reader.NumDocs()); IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(INDEX_DIR)); // Construct a search query. String 1 in term is the field name, String 2 the content to match against. // The name of the field "content" here and how we filled a "content" field using the IndexWrite above. // Apparently BooleanQuery can be used to combine search queries. Term searchTerm = new Term("content", "defg"); TermQuery query = new TermQuery(searchTerm); // Used to collect the highest scoring hits when searching. TopScoreDocCollector topDocColl = TopScoreDocCollector.Create(10, true); searcher.Search(query, topDocColl); // Collection of documents matched using the search query. TopDocs topDocs = topDocColl.TopDocs(); Console.WriteLine("Number of hits: " + topDocs.TotalHits); // Traverse through the search hits, print their index in the query and the document's name. foreach (var searchHit in topDocs.ScoreDocs) { Console.WriteLine(searchHit.Doc + ". " + searcher.Doc(searchHit.Doc).GetField("name").StringValue); } } catch (IOException excep) { Console.Out.WriteLine(excep.Message); } }
public static void Main(System.String[] args) { System.String usage = typeof(IndexFiles) + " <root_directory>"; if (args.Length == 0) { System.Console.Error.WriteLine("Usage: " + usage); System.Environment.Exit(1); } bool tmpBool; if (System.IO.File.Exists(INDEX_DIR.FullName)) { tmpBool = true; } else { tmpBool = System.IO.Directory.Exists(INDEX_DIR.FullName); } if (tmpBool) { System.Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); System.Environment.Exit(1); } System.IO.FileInfo docDir = new System.IO.FileInfo(args[0]); bool tmpBool2; if (System.IO.File.Exists(docDir.FullName)) { tmpBool2 = true; } else { tmpBool2 = System.IO.Directory.Exists(docDir.FullName); } if (!tmpBool2) // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#? { System.Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path"); System.Environment.Exit(1); } System.DateTime start = System.DateTime.Now; try { IndexWriter writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); System.Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'..."); IndexDocs(writer, docDir); System.Console.Out.WriteLine("Optimizing..."); writer.Optimize(); writer.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds"); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
/* Walk directory hierarchy in uid order, while keeping uid iterator from * /* existing index in sync. Mismatches indicate one of: (a) old documents to * /* be deleted; (b) unchanged documents, to be left alone; or (c) new * /* documents, to be indexed. */ private static void IndexDocs(System.IO.FileInfo file, System.IO.FileInfo index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text())); reader.DeleteDocuments(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else { IndexDocs(file); } }
public virtual void TestThreadedOptimize_Renamed() { Directory directory = new MockRAMDirectory(); runTest(directory, false, new SerialMergeScheduler()); runTest(directory, true, new SerialMergeScheduler()); runTest(directory, false, new ConcurrentMergeScheduler()); runTest(directory, true, new ConcurrentMergeScheduler()); directory.Close(); System.String tempDir = SupportClass.AppSettings.Get("tempDir", ""); if (tempDir == null) { throw new System.IO.IOException("tempDir undefined, cannot run test"); } System.String dirName = tempDir + "/luceneTestThreadedOptimize"; directory = FSDirectory.Open(new System.IO.FileInfo(dirName)); runTest(directory, false, new SerialMergeScheduler()); runTest(directory, true, new SerialMergeScheduler()); runTest(directory, false, new ConcurrentMergeScheduler()); runTest(directory, true, new ConcurrentMergeScheduler()); directory.Close(); _TestUtil.RmDir(dirName); }
/* Walk directory hierarchy in uid order, while keeping uid iterator from * /* existing index in sync. Mismatches indicate one of: (a) old documents to * /* be deleted; (b) unchanged documents, to be left alone; or (c) new * /* documents, to be indexed. */ private static void IndexDocs(IndexWriter writer, DirectoryInfo file, DirectoryInfo index, Operation operation) { if (operation == Operation.CompleteReindex) { // Perform a full reindexing. IndexDirectory(writer, null, file, operation); } else { // Perform an incremental reindexing. using (var reader = IndexReader.Open(FSDirectory.Open(index), true)) // open existing index using (var uidIter = reader.Terms(new Term("uid", ""))) // init uid iterator { IndexDirectory(writer, uidIter, file, operation); if (operation == Operation.RemoveStale) { // Delete remaining, presumed stale, documents. This works since // the above call to IndexDirectory should have positioned the uidIter // after any uids matching existing documents. Any remaining uid // is remains from documents that has been deleted since they was // indexed. while (uidIter.Term != null && uidIter.Term.Field == "uid") { Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text)); writer.DeleteDocuments(uidIter.Term); uidIter.Next(); } } } } }
static void Main(string[] args) { var docDir = new DirectoryInfo(@"C:\clsdev"); indexUpdateTime = Directory.GetLastWriteTime(indexLocation); try { using (var writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new CustomAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED)) { Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'..."); IndexDirectory(writer, docDir); Console.Out.WriteLine("Optimizing..."); writer.Optimize(); writer.Commit(); } var end = DateTime.Now; //Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds"); } catch (IOException e) { Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } deleteDocs(); }
void BuildIndex() { IndexWriter writer = new IndexWriter(FSDirectory.Open(m_indexDir), new SnowballAnalyzer(Version.LUCENE_30, "English"), true, IndexWriter.MaxFieldLength.UNLIMITED); var documents = new List <Document>(); foreach (var fileInfo in new DirectoryInfo(m_xmlRoot).EnumerateFiles()) { // Read and parse XML file string xmlText = File.ReadAllText(m_xmlRoot + fileInfo.Name); string topicName = fileInfo.Name.Substring(0, fileInfo.Name.IndexOf('.')); XmlReader reader = XmlReader.Create(new StringReader(xmlText)); // Create index data from XML while (reader.Read()) { var doc = new Document(); doc.Add(new Field("Topic", topicName, Field.Store.YES, Field.Index.ANALYZED)); for (int i = 0; i < reader.AttributeCount; i++) { reader.MoveToAttribute(i); doc.Add(new Field(reader.Name, reader.Value, Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } } ; writer.Optimize(); writer.Commit(); writer.Dispose(); }
static void Index() { // Used to write out an index for later usage. IndexWriter writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED); try { // A series of fields, used for indexing and searching. Document doc1 = new Document(); // https://lucenenet.apache.org/docs/3.0.3/db/d65/class_lucene_1_1_net_1_1_documents_1_1_document.html#details // https://lucene.apache.org/core/3_0_3/api/core/org/apache/lucene/document/Field.Index.html // A field contains a name and a string value. // If a field is stored, it will be returned with the document when // a search lands a hit on it. // Not indexing a field leads to it not being searchable. doc1.Add(new Field("name", "doc 1", Field.Store.YES, Field.Index.NO)); // This field is indexed, meaning it is searchable using a query, as well as analyzed, // meaning it is run through an analyzer ( which leads to it being split into a searchable, tokenized form ). // Alternatively, a field may be indexed but not analyzed, leading to it being stored as a single token. // This may be useful for single value fields like names or ids. doc1.Add(new Field("content", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); // Finally, we add the document to index writer. writer.AddDocument(doc1); Document doc2 = new Document(); doc2.Add(new Field("name", "doc 2", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "abc defg defg defg", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc2); Document doc3 = new Document(); doc3.Add(new Field("name", "doc 3", Field.Store.YES, Field.Index.NO)); doc3.Add(new Field("content", "qwerty defg defg", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc3); Console.Out.WriteLine("Optimizing..."); // Optimizes the created index for fast search. Optimally call when the index is // created fully. writer.Optimize(); // Takes all changes to the index and merges them. After this call all changes // are permanently written into the index. writer.Commit(); } catch (IOException excep) { Console.WriteLine(excep.Message); } writer.Dispose(); }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexNoAdds(System.String dirName) { dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length, "wrong number of hits"); Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); searcher.Close(); // make sure we can do a delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.IndexReader); searcher.Close(); // optimize IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.IndexReader); searcher.Close(); dir.Close(); }
public virtual void TestPayloadsEncoding() { rnd = NewRandom(); // first perform the test using a RAMDirectory Directory dir = new RAMDirectory(); PerformTest(dir); // now use a FSDirectory and repeat same test System.IO.DirectoryInfo dirName = _TestUtil.GetTempDir("test_payloads"); dir = FSDirectory.Open(dirName); PerformTest(dir); _TestUtil.RmDir(dirName); }
public virtual void TestOptimizeOldIndex() { for (int i = 0; i < oldNames.Length; i++) { System.String dirName = "../../test/core/index/index." + oldNames[i]; Unzip(dirName, oldNames[i]); System.String fullPath = FullDir(oldNames[i]); Directory dir = FSDirectory.Open(new System.IO.FileInfo(fullPath)); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); w.Optimize(); w.Close(); _TestUtil.CheckIndex(dir); dir.Close(); RmDir(oldNames[i]); } }
public IndexController() { m_xmlRoot = Config.Get().m_pathToXML; if (!Directory.Exists(m_xmlRoot + @"..\index")) { Directory.CreateDirectory(m_xmlRoot + @"..\index"); } m_indexDir = new DirectoryInfo(m_xmlRoot + @"..\index"); if (m_indexDir.GetFiles().Length == 0) { BuildIndex(); } m_searcher = new IndexSearcher(FSDirectory.Open(m_indexDir)); }
private void TestIndexInternal(int maxWait) { bool create = true; //Directory rd = new RAMDirectory(); // work on disk to make sure potential lock problems are tested: System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex")); Directory rd = FSDirectory.Open(indexDir); IndexThread.id = 0; IndexThread.idStack.Clear(); IndexModifier index = new IndexModifier(rd, new StandardAnalyzer(), create); IndexThread thread1 = new IndexThread(index, maxWait, 1); thread1.Start(); IndexThread thread2 = new IndexThread(index, maxWait, 2); thread2.Start(); while (thread1.IsAlive || thread2.IsAlive) { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 100)); } index.Optimize(); int added = thread1.added + thread2.added; int deleted = thread1.deleted + thread2.deleted; Assert.AreEqual(added - deleted, index.DocCount()); index.Close(); try { index.Close(); Assert.Fail(); } catch (System.SystemException e) { // expected exception } RmDir(indexDir); }
public static void Main(String[] args) { var usage = typeof(IndexFiles) + " <root_directory>"; if (args.Length == 0) { Console.Error.WriteLine("Usage: " + usage); Environment.Exit(1); } if (File.Exists(INDEX_DIR.FullName) || Directory.Exists(INDEX_DIR.FullName)) { Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); Environment.Exit(1); } var docDir = new DirectoryInfo(args[0]); var docDirExists = File.Exists(docDir.FullName) || Directory.Exists(docDir.FullName); if (!docDirExists) // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#? { Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path"); Environment.Exit(1); } var start = DateTime.Now; try { using (var writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED)) { Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'..."); IndexDirectory(writer, docDir); Console.Out.WriteLine("Optimizing..."); writer.Optimize(); writer.Commit(); } var end = DateTime.Now; Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds"); } catch (IOException e) { Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public override void SetUp() { base.SetUp(); workDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "TestDoc")); System.IO.Directory.CreateDirectory(workDir.FullName); indexDir = new System.IO.FileInfo(System.IO.Path.Combine(workDir.FullName, "testIndex")); System.IO.Directory.CreateDirectory(indexDir.FullName); Directory directory = FSDirectory.Open(indexDir); directory.Close(); files = new System.Collections.ArrayList(); files.Add(CreateOutput("test.txt", "This is the first test file")); files.Add(CreateOutput("test2.txt", "This is the second test file")); }
public virtual void TestSnapshotDeletionPolicy_Renamed() { System.IO.DirectoryInfo dir = _TestUtil.GetTempDir(INDEX_PATH); try { Directory fsDir = FSDirectory.Open(dir); RunTest(fsDir); fsDir.Close(); } finally { _TestUtil.RmDir(dir); } MockRAMDirectory dir2 = new MockRAMDirectory(); RunTest(dir2); dir2.Close(); }
public virtual void TestSnapshotDeletionPolicy_Renamed() { System.IO.FileInfo dir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), INDEX_PATH)); try { // Sometimes past test leaves the dir _TestUtil.RmDir(dir); Directory fsDir = FSDirectory.Open(dir); RunTest(fsDir); fsDir.Close(); } finally { _TestUtil.RmDir(dir); } MockRAMDirectory dir2 = new MockRAMDirectory(); RunTest(dir2); dir2.Close(); }
public static void deleteDocs() { IndexReader reader = IndexReader.Open(FSDirectory.Open(INDEX_DIR), true); var writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new CustomAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < reader.NumDocs(); i++) { Document doc = reader.Document(i); String docId = doc.Get("path"); if (!File.Exists(docId)) { //var writer = new IndexWriter(INDEX_DIR, analyzer, false, IndexWriter.MaxFieldLength.LIMITED); var term = new Term("path", docId); writer.DeleteDocuments(term); Console.WriteLine("Deleting " + docId); } } writer.Optimize(); writer.Commit(); writer.Dispose(); }
/// <summary> /// 对dataDir下的所有文件建立索引并存储在indexDir下 /// </summary> /// <param name="indexDir"></param> /// <param name="dataDir"></param> public static void Index(string indexDir, string dataDir) { if (Directory.Exists(indexDir)) { Console.Out.WriteLine("Cannot save index to '" + indexDir + "' directory, please delete it first"); Environment.Exit(1); } try { using (var writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexDir)), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED)) { Console.Out.WriteLine("Indexing to directory '" + indexDir + "'..."); IndexDirectory(writer, new DirectoryInfo(dataDir)); writer.Commit(); } } catch (IOException e) { Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public static void Main(System.String[] args) { var usage = typeof(DeleteFiles) + " <unique_term>"; if (args.Length == 0) { Console.Error.WriteLine("Usage: " + usage); Environment.Exit(1); } try { // We don't want a read-only reader because we are about to delete. using (var directory = FSDirectory.Open("index")) using (var reader = IndexReader.Open(directory, false)) { var term = new Term("path", args[0]); var deleted = reader.DeleteDocuments(term); Console.Out.WriteLine("deleted " + deleted + " documents containing " + term); // one can also delete documents by their internal id: /* * for (int i = 0; i < reader.MaxDoc; i++) { * Console.Out.WriteLine("Deleting document with id " + i); * reader.DeleteDocument(i); * } */ reader.Commit(); } } catch (Exception e) { Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void CreateIndex(System.String dirName, bool doCFS) { RmDir(dirName); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = doCFS; writer.SetMaxBufferedDocs(10); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // open fresh writer so we get no prx file in the added segment writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = doCFS; writer.SetMaxBufferedDocs(10); AddNoProxDoc(writer); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); }
public static void Main(String[] args) { String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { Console.Out.WriteLine(usage); Environment.Exit(0); } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; bool raw = false; String normsField = null; bool paging = true; int hitsPerPage = 10; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } else if ("-paging".Equals(args[i])) { if (args[i + 1].Equals("false")) { paging = false; } else { hitsPerPage = Int32.Parse(args[i + 1]); if (hitsPerPage == 0) { paging = false; } } i++; } } IndexReader indexReader = null; try { // only searching, so read-only=true indexReader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(index)), true); // only searching, so read-only=true if (normsField != null) { indexReader = new OneNormsReader(indexReader, normsField); } Searcher searcher = new IndexSearcher(indexReader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); StreamReader queryReader; if (queries != null) { queryReader = new StreamReader(new StreamReader(queries, Encoding.Default).BaseStream, new StreamReader(queries, Encoding.Default).CurrentEncoding); } else { queryReader = new StreamReader(new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).BaseStream, new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).CurrentEncoding); } var parser = new QueryParser(Version.LUCENE_30, field, analyzer); while (true) { if (queries == null) { // prompt the user Console.Out.WriteLine("Enter query: "); } String line = queryReader.ReadLine(); if (line == null || line.Length == -1) { break; } line = line.Trim(); if (line.Length == 0) { break; } Query query = parser.Parse(line); Console.Out.WriteLine("Searching for: " + query.ToString(field)); if (repeat > 0) { // repeat & time as benchmark DateTime start = DateTime.Now; for (int i = 0; i < repeat; i++) { searcher.Search(query, null, 100); } DateTime end = DateTime.Now; Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } if (paging) { DoPagingSearch(queryReader, searcher, query, hitsPerPage, raw, queries == null); } else { DoStreamingSearch(searcher, query); } } queryReader.Close(); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
public virtual void TestNorms() { // tmp dir System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } // test with a single index: index1 System.IO.FileInfo indexDir1 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1")); Directory dir1 = FSDirectory.Open(indexDir1); IndexWriter.Unlock(dir1); norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); CreateIndex(dir1); DoTestNorms(dir1); // test with a single index: index2 System.Collections.ArrayList norms1 = norms; System.Collections.ArrayList modifiedNorms1 = modifiedNorms; int numDocNorms1 = numDocNorms; norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); numDocNorms = 0; System.IO.FileInfo indexDir2 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2")); Directory dir2 = FSDirectory.Open(indexDir2); CreateIndex(dir2); DoTestNorms(dir2); // add index1 and index2 to a third index: index3 System.IO.FileInfo indexDir3 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3")); Directory dir3 = FSDirectory.Open(indexDir3); CreateIndex(dir3); IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.AddIndexes(new Directory[] { dir1, dir2 }); iw.Close(); norms1.AddRange(norms); norms = norms1; modifiedNorms1.AddRange(modifiedNorms); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 VerifyIndex(dir3); DoTestNorms(dir3); // now with optimize iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.Optimize(); iw.Close(); VerifyIndex(dir3); dir1.Close(); dir2.Close(); dir3.Close(); }
public virtual void TestExactFileNames() { System.String outputDir = "lucene.backwardscompat0.index"; RmDir(outputDir); try { Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir))); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(16.0); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); int contentFieldIndex = -1; for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); // Now verify file names: System.String[] expected; expected = new System.String[] { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" }; System.String[] actual = dir.ListAll(); System.Array.Sort(expected); System.Array.Sort(actual); if (!CollectionsHelper.Equals(expected, actual)) { Assert.Fail("incorrect filenames in index: expected:\n " + AsString(expected) + "\n actual:\n " + AsString(actual)); } dir.Close(); } finally { RmDir(outputDir); } }
public virtual void searchIndex(System.String dirName, System.String oldName) { //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer()); //Query query = parser.parse("handle:1"); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexSearcher searcher = new IndexSearcher(dir, true); IndexReader reader = searcher.IndexReader; _TestUtil.CheckIndex(dir); for (int i = 0; i < 35; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i); var fields = d.GetFields(); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { if (d.GetField("content3") == null) { int numFields = oldName.StartsWith("29.") ? 7 : 5; Assert.AreEqual(numFields, fields.Count); Field f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = (Field)d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = (Field)d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } } } // Only ID 7 is deleted else { Assert.AreEqual(7, i); } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #21 since it's norm was // increased: Document d2 = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first"); TestHits(hits, 34, searcher.IndexReader); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { // Test on indices >= 2.3 hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); } searcher.Close(); dir.Close(); }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexWithAdds(System.String dirName) { System.String origDirName = dirName; dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // open writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origDirName, "24") < 0) { expected = 45; } else { expected = 46; } Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count"); writer.Close(); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); TestHits(hits, 44, searcher.IndexReader); searcher.Close(); // make sure we can do delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 43, searcher.IndexReader); searcher.Close(); // optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); TestHits(hits, 43, searcher.IndexReader); Assert.AreEqual("22", d.Get("id"), "wrong first document"); searcher.Close(); dir.Close(); }