protected internal FSDirectory GetDirectory(bool create) { if (directory == null) { directory = FSDirectory.GetDirectory(SupportClass.AppSettings.Get("test.index.dir", "."), create); } return directory; }
public FaultyFSDirectory(System.IO.DirectoryInfo dir) { fsDir = FSDirectory.Open(dir); interalLockFactory = fsDir.LockFactory; }
public FaultyFSDirectory(System.IO.FileInfo dir) { fsDir = FSDirectory.Open(dir); lockFactory = fsDir.GetLockFactory(); }
public virtual void TestNorms() { // tmp dir System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } // test with a single index: index1 System.IO.FileInfo indexDir1 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1")); Directory dir1 = FSDirectory.Open(indexDir1); IndexWriter.Unlock(dir1); norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); CreateIndex(dir1); DoTestNorms(dir1); // test with a single index: index2 System.Collections.ArrayList norms1 = norms; System.Collections.ArrayList modifiedNorms1 = modifiedNorms; int numDocNorms1 = numDocNorms; norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); numDocNorms = 0; System.IO.FileInfo indexDir2 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2")); Directory dir2 = FSDirectory.Open(indexDir2); CreateIndex(dir2); DoTestNorms(dir2); // add index1 and index2 to a third index: index3 System.IO.FileInfo indexDir3 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3")); Directory dir3 = FSDirectory.Open(indexDir3); CreateIndex(dir3); IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.AddIndexes(new Directory[] { dir1, dir2 }); iw.Close(); norms1.AddRange(norms); norms = norms1; modifiedNorms1.AddRange(modifiedNorms); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 VerifyIndex(dir3); DoTestNorms(dir3); // now with optimize iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.Optimize(); iw.Close(); VerifyIndex(dir3); dir1.Close(); dir2.Close(); dir3.Close(); }
// Methods public RemoteIndex(string name, string folder) { this._name = name; this._folder = folder; this._directory = this.CreateDirectory(FileUtil.MapPath(FileUtil.MakePath(Config.RemoteIndexingServer, folder))); }
/// <summary> /// Creates the index for the bz2 file on a separate thread. /// </summary> private void CreateIndexAsync() { bool failed = false; startTime = DateTime.Now; try { // Close any searchers if (searcher != null) { searcher.Close(); searcher = null; } indexExists = false; // Create the index writer FSDirectory idxDir = FSDirectory.Open(new DirectoryInfo(indexPath)); indexer = new IndexWriter(idxDir, textAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); memoryIndexer = new IndexWriter(new RAMDirectory(), textAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); memoryIndexer.SetMaxBufferedDocs(1000); memoryIndexer.SetMergeFactor(100); indexer.SetMaxBufferedDocs(1000); indexer.SetMergeFactor(100); // Locate the bzip2 blocks in the file LocateBlocks(); // Two times more than the first block but not less than 100 bytes long bufSize = ((ends[0] - beginnings[0]) / 8) * 2 + 100; // Buffers for the current and next block blockBuf = new byte[bufSize]; charBuf = new char[bufSize]; // Whether there was a Wiki topic carryover from current block to the next one char[] charCarryOver = new char[0]; // The length of the currently loaded data long loadedLength = 0; StringBuilder sb = new StringBuilder(); // Starting indexing startTime = DateTime.Now; elapsed = new TimeSpan(0); ReportProgress(0, IndexingProgress.State.Running, Properties.Resources.ProgressIndexing); for (long i = 0; i < totalBlocks && !abortIndexing; i++) { ReportProgress((int)((double)(i * 100) / (double)totalBlocks), IndexingProgress.State.Running, String.Empty); #region Indexing logic loadedLength = LoadBlock(beginnings[i], ends[i], ref blockBuf); if (charBuf.Length < blockBuf.Length) { charBuf = new char[blockBuf.Length]; } int bytesUsed = 0; int charsUsed = 0; bool completed = false; // Convert the text to UTF8 utf8.Convert(blockBuf, 0, (int)loadedLength, charBuf, 0, charBuf.Length, i == totalBlocks - 1, out bytesUsed, out charsUsed, out completed); if (!completed) { throw new Exception(Properties.Resources.UTFDecoderError); } // Construct a current string sb.Length = 0; if (charCarryOver.Length > 0) { sb.Append(charCarryOver); } sb.Append(charBuf, 0, charsUsed); int carryOverLength = charCarryOver.Length; int charsMatched = IndexString(sb.ToString(), beginnings[i], ends[i], carryOverLength, i == totalBlocks - 1); // There's a Wiki topic carryover, let's store the characters which need to be carried over if (charsMatched > 0) { charCarryOver = new char[charsMatched]; sb.CopyTo(charsUsed + carryOverLength - charsMatched, charCarryOver, 0, charsMatched); } else { charCarryOver = new char[0]; } #endregion } // Wait till all the threads finish while (activeThreads != 0) { ReportProgress(0, IndexingProgress.State.Running, String.Format(Properties.Resources.WaitingForTokenizers, activeThreads)); Thread.Sleep(TimeSpan.FromSeconds(5)); } ReportProgress(0, IndexingProgress.State.Running, Properties.Resources.FlushingDocumentsToDisk); Lucene.Net.Store.Directory dir = memoryIndexer.GetDirectory(); memoryIndexer.Close(); indexer.AddIndexesNoOptimize(new Lucene.Net.Store.Directory[] { dir }); memoryIndexer = null; ReportProgress(0, IndexingProgress.State.Running, Properties.Resources.OptimizingIndex); indexer.Optimize(); indexExists = true; } catch (Exception ex) { ReportProgress(0, IndexingProgress.State.Failure, ex.ToString()); failed = true; } // Try to release some memory if (indexer != null) { indexer.Close(); indexer = null; } if (failed || abortIndexing) { Directory.Delete(indexPath, true); indexExists = false; } else { if (indexExists) { FSDirectory idxDir = FSDirectory.Open(new DirectoryInfo(indexPath)); searcher = new IndexSearcher(idxDir, true); } } ReportProgress(0, IndexingProgress.State.Finished, String.Empty); }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexWithAdds(System.String dirName) { System.String origDirName = dirName; dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // open writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origDirName, "24") < 0) { expected = 45; } else { expected = 46; } Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count"); writer.Close(); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); TestHits(hits, 44, searcher.IndexReader); searcher.Close(); // make sure we can do delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 43, searcher.IndexReader); searcher.Close(); // optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); TestHits(hits, 43, searcher.IndexReader); Assert.AreEqual("22", d.Get("id"), "wrong first document"); searcher.Close(); dir.Close(); }
// Methods public RemoteIndex(string name, string folder) { this._name = name; this._folder = folder; this._directory = this.CreateDirectory(FileUtil.MapPath(FileUtil.MakePath(Sitecore.Configuration.Settings.GetSetting("RemoteIndexLocation"), folder))); }
public static void Main(System.String[] args) { System.String filename = null; bool extract = false; for (int i = 0; i < args.Length; ++i) { if (args[i].Equals("-extract")) { extract = true; } else if (filename == null) { filename = args[i]; } } if (filename == null) { System.Console.Out.WriteLine("Usage: Lucene.Net.index.IndexReader [-extract] <cfsfile>"); return; } Directory dir = null; CompoundFileReader cfr = null; try { System.IO.FileInfo file = new System.IO.FileInfo(filename); System.String dirname = new System.IO.FileInfo(file.FullName).DirectoryName; filename = file.Name; dir = FSDirectory.GetDirectory(dirname, false); cfr = new CompoundFileReader(dir, filename); System.String[] files = cfr.List(); System.Array.Sort(files); // sort the array of filename so that the output is more readable for (int i = 0; i < files.Length; ++i) { long len = cfr.FileLength(files[i]); if (extract) { System.Console.Out.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory..."); IndexInput ii = cfr.OpenInput(files[i]); System.IO.FileStream f = new System.IO.FileStream(files[i], System.IO.FileMode.Create); // read and write with a small buffer, which is more effectiv than reading byte by byte byte[] buffer = new byte[1024]; int chunk = buffer.Length; while (len > 0) { int bufLen = (int)System.Math.Min(chunk, len); ii.ReadBytes(buffer, 0, bufLen); byte[] byteArray = new byte[buffer.Length]; for (int index = 0; index < buffer.Length; index++) { byteArray[index] = (byte)buffer[index]; } f.Write(byteArray, 0, bufLen); len -= bufLen; } f.Close(); ii.Close(); } else { System.Console.Out.WriteLine(files[i] + ": " + len + " bytes"); } } } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } finally { try { if (dir != null) { dir.Close(); } if (cfr != null) { cfr.Close(); } } catch (System.IO.IOException ioe) { System.Console.Error.WriteLine(ioe.StackTrace); } } }
/// <summary> Returns the time the index in the named directory was last modified. /// Do not use this to check whether the reader is still up-to-date, use /// {@link #IsCurrent()} instead. /// </summary> public static long LastModified(System.IO.FileInfo directory) { return(FSDirectory.FileModified(directory, IndexFileNames.SEGMENTS)); }
/// <summary>Returns an IndexReader reading the index in an FSDirectory in the named /// path. /// </summary> public static IndexReader Open(System.IO.FileInfo path) { return(Open(FSDirectory.GetDirectory(path, false), true)); }
public static void Main(String[] args) { String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0]))) { Console.Out.WriteLine(usage); Environment.Exit(0); } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; bool raw = false; String normsField = null; bool paging = true; int hitsPerPage = 10; for (int i = 0; i < args.Length; i++) { if ("-index".Equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".Equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".Equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".Equals(args[i])) { repeat = Int32.Parse(args[i + 1]); i++; } else if ("-raw".Equals(args[i])) { raw = true; } else if ("-norms".Equals(args[i])) { normsField = args[i + 1]; i++; } else if ("-paging".Equals(args[i])) { if (args[i + 1].Equals("false")) { paging = false; } else { hitsPerPage = Int32.Parse(args[i + 1]); if (hitsPerPage == 0) { paging = false; } } i++; } } IndexReader indexReader = null; try { // only searching, so read-only=true indexReader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(index)), true); // only searching, so read-only=true if (normsField != null) { indexReader = new OneNormsReader(indexReader, normsField); } Searcher searcher = new IndexSearcher(indexReader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); StreamReader queryReader; if (queries != null) { queryReader = new StreamReader(new StreamReader(queries, Encoding.Default).BaseStream, new StreamReader(queries, Encoding.Default).CurrentEncoding); } else { queryReader = new StreamReader(new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).BaseStream, new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).CurrentEncoding); } var parser = new QueryParser(Version.LUCENE_30, field, analyzer); while (true) { if (queries == null) { // prompt the user Console.Out.WriteLine("Enter query: "); } String line = queryReader.ReadLine(); if (line == null || line.Length == -1) { break; } line = line.Trim(); if (line.Length == 0) { break; } Query query = parser.Parse(line); Console.Out.WriteLine("Searching for: " + query.ToString(field)); if (repeat > 0) { // repeat & time as benchmark DateTime start = DateTime.Now; for (int i = 0; i < repeat; i++) { searcher.Search(query, null, 100); } DateTime end = DateTime.Now; Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms"); } if (paging) { DoPagingSearch(queryReader, searcher, query, hitsPerPage, raw, queries == null); } else { DoStreamingSearch(searcher, query); } } queryReader.Close(); } finally { if (indexReader != null) { indexReader.Dispose(); } } }
/// <summary> Constructs an IndexWriter for the index in <code>path</code>. /// Text will be analyzed with <code>a</code>. If <code>create</code> /// is true, then a new, empty index will be created in /// <code>path</code>, replacing the index already there, if any. /// /// </summary> /// <param name="path">the path to the index directory /// </param> /// <param name="a">the analyzer to use /// </param> /// <param name="create"><code>true</code> to create the index or overwrite /// the existing one; <code>false</code> to append to the existing /// index /// </param> /// <throws> IOException if the directory cannot be read/written to, or </throws> /// <summary> if it does not exist, and <code>create</code> is /// <code>false</code> /// </summary> public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create) : this(FSDirectory.GetDirectory(path, create), a, create, true) { }
public virtual void searchIndex(System.String dirName, System.String oldName) { //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer()); //Query query = parser.parse("handle:1"); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexSearcher searcher = new IndexSearcher(dir, true); IndexReader reader = searcher.IndexReader; _TestUtil.CheckIndex(dir); for (int i = 0; i < 35; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i); var fields = d.GetFields(); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { if (d.GetField("content3") == null) { int numFields = oldName.StartsWith("29.") ? 7 : 5; Assert.AreEqual(numFields, fields.Count); Field f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = (Field)d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = (Field)d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } } } // Only ID 7 is deleted else { Assert.AreEqual(7, i); } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #21 since it's norm was // increased: Document d2 = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first"); TestHits(hits, 34, searcher.IndexReader); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { // Test on indices >= 2.3 hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); } searcher.Close(); dir.Close(); }
private void Demo_FSIndexInputBug(FSDirectory fsdir, System.String file) { // Setup the test file - we need more than 1024 bytes IndexOutput os = fsdir.CreateOutput(file); for (int i = 0; i < 2000; i++) { os.WriteByte((byte) i); } os.Close(); IndexInput in_Renamed = fsdir.OpenInput(file); // This read primes the buffer in IndexInput byte b = in_Renamed.ReadByte(); // Close the file in_Renamed.Close(); // ERROR: this call should fail, but succeeds because the buffer // is still filled b = in_Renamed.ReadByte(); // ERROR: this call should fail, but succeeds for some reason as well in_Renamed.Seek(1099); try { // OK: this call correctly fails. We are now past the 1024 internal // buffer, so an actual IO is attempted, which fails b = in_Renamed.ReadByte(); Assert.Fail("expected readByte() to throw exception"); } catch (System.Exception) { // expected exception } }
public virtual void TestExactFileNames() { System.String outputDir = "lucene.backwardscompat0.index"; RmDir(outputDir); try { Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir))); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(16.0); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count"); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); int contentFieldIndex = -1; for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); // Now verify file names: System.String[] expected; expected = new System.String[] { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" }; System.String[] actual = dir.ListAll(); System.Array.Sort(expected); System.Array.Sort(actual); if (!CollectionsHelper.Equals(expected, actual)) { Assert.Fail("incorrect filenames in index: expected:\n " + AsString(expected) + "\n actual:\n " + AsString(actual)); } dir.Close(); } finally { RmDir(outputDir); } }
/// <summary> /// 构造函数接收索引存放的目录路径 /// </summary> /// <param name="indexDirectoryPath"></param> public Searcher(string indexDirectoryPath) { indexSearch = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(indexDirectoryPath))); queryParser = new QueryParser(Version.LUCENE_30, Params.CONTENTS, new StandardAnalyzer(Version.LUCENE_30)); }
public virtual void TestLazyPerformance() { System.String tmpIODir = AppSettings.Get("tempDir", ""); System.String userName = System.Environment.UserName; System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName; System.IO.DirectoryInfo file = new System.IO.DirectoryInfo(path); _TestUtil.RmDir(file); FSDirectory tmpDir = FSDirectory.Open(file); Assert.IsTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = false; writer.AddDocument(testDoc); writer.Close(); Assert.IsTrue(fieldInfos != null); FieldsReader reader; long lazyTime = 0; long regularTime = 0; int length = 50; ISet <string> lazyFieldNames = Support.Compatibility.SetFactory.CreateHashSet <string>(); lazyFieldNames.Add(DocHelper.LARGE_LAZY_FIELD_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Support.Compatibility.SetFactory.CreateHashSet <string>(), lazyFieldNames); for (int i = 0; i < length; i++) { reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc; doc = reader.Doc(0, null); //Load all of them Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); IFieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy == false, "field is lazy"); System.String value_Renamed; long start; long finish; start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this was always 0ms. value_Renamed = field.StringValue; finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(field != null, "field is null and it shouldn't be"); regularTime += (finish - start); reader.Dispose(); reader = null; doc = null; //Hmmm, are we still in cache??? System.GC.Collect(); reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); doc = reader.Doc(0, fieldSelector); field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy == true, "field is not lazy"); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this took around 50 - 70ms value_Renamed = field.StringValue; finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); lazyTime += (finish - start); reader.Dispose(); } System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); }
public virtual void SetUp() { //dir = new RAMDirectory(); dir = FSDirectory.GetDirectory(new System.IO.FileInfo(SupportClass.AppSettings.Get("tempDir", "testIndex")), true); }
internal static void IndexDocs(IndexWriter writer, FileInfo file) { //Console.Out.WriteLine("adding " + file); if (!exts.Contains(file.Extension)) { return; } try { IndexReader indexReader = null; try { indexReader = IndexReader.Open(FSDirectory.Open(INDEX_DIR), true); // only searching, so read-only=true Searcher searcher = new IndexSearcher(indexReader); Analyzer analyzer = new CustomAnalyzer(); //Query query = new TermQuery(new Term("3", "\"Fuel Tank Capacity\"@en")); //var parser = new QueryParser(Version.LUCENE_30, "path", analyzer); //Query query = parser.Parse(file.FullName); var q = new TermQuery(new Term("path", file.FullName)); var search = searcher.Search(q, null, 1); var hits = search.ScoreDocs; if (hits.Length > 0) { Document doc = searcher.Doc(hits[0].Doc); DateTime modifiedStored = DateTools.StringToDate(doc.Get("modified")); DateTime actuallyModified = File.GetLastWriteTime(file.FullName); string actuallyModifiedString = DateTools.DateToString(actuallyModified, DateTools.Resolution.MINUTE); actuallyModified = DateTools.StringToDate(actuallyModifiedString); if (modifiedStored < actuallyModified) { //Console.WriteLine(File.GetLastWriteTime(file.FullName)); //string _ = DateTools.DateToString(File.GetLastWriteTime(file.FullName),DateTools.Resolution.MINUTE); //Console.WriteLine(_); //DateTime __ = DateTools.StringToDate(_); //Console.WriteLine(DateTools.DateToString(__, DateTools.Resolution.MINUTE)); //Console.WriteLine(DateTools.TimeToString(file.LastWriteTime.Millisecond, DateTools.Resolution.MINUTE)); writer.UpdateDocument(new Term("path", doc.Get("path")), FileDocument.Document(file)); Console.WriteLine("Updating " + file.FullName); } } else { writer.AddDocument(FileDocument.Document(file)); Console.WriteLine("Adding " + file.FullName); } } finally { if (indexReader != null) { indexReader.Dispose(); } } //else //{ //} //writer.UpdateDocument(FileDocument.Document(file)); } catch (FileNotFoundException) { // At least on Windows, some temporary files raise this exception with an // "access denied" message checking if the file can be read doesn't help. } catch (UnauthorizedAccessException) { // Handle any access-denied errors that occur while reading the file. } catch (IOException) { // Generic handler for any io-related exceptions that occur. } }
public IndexableFileInfo[] getRelatedFiles(string title, int maxResultsToReturn) { // http://blogs.intesoft.net/post/2008/04/NHibernateSearch-using-LuceneNET-Full-Text-Index-(Part-3).aspx Analyzer analyzer = new StandardAnalyzer(); BooleanQuery query = new BooleanQuery(); if (title.Trim() != "") { Query titleQ = Similarity.Net.SimilarityQueries.FormSimilarQuery(title, analyzer, "title", null); titleQ.SetBoost(LuceneIndexer.TitleFieldBoost); query.Add(titleQ, BooleanClause.Occur.SHOULD); Query contents = Similarity.Net.SimilarityQueries.FormSimilarQuery(title, analyzer, "contents", null); query.Add(contents, BooleanClause.Occur.SHOULD); } // avoid the page being similar to itself! // query.Add(new TermQuery(new Term("title", title)), BooleanClause.Occur.MUST_NOT); /// IndexReader ir = ... /// IndexSearcher is = ... /// <b> /// MoreLikeThis mlt = new MoreLikeThis(ir); /// Reader target = ... </b><em>// orig source of doc you want to find similarities to</em><b> /// Query query = mlt.Like( target); /// </b> /// Hits hits = is.Search(query); FSDirectory indexDir = FSDirectory.GetDirectory(this.luceneIndexDir, false); IndexSearcher searcher; try { searcher = new IndexSearcher(indexDir); } catch { // if the luceneIndexDir does not contain index files (yet), IndexSearcher // throws a nice Exception. return(new IndexableFileInfo[0]); } List <IndexableFileInfo> arrayList = new List <IndexableFileInfo>(); Hits hits = searcher.Search(query); try { int num = Math.Min(maxResultsToReturn, hits.Length()); for (int i = 0; i < num; i++) { Document d = hits.Doc(i); string filename = d.Get("filename"); string plainText = d.Get("contents"); string doctitle = d.Get("title"); string filenameParams = d.Get("filenameParams"); bool contentIsPageSummary = Convert.ToBoolean(d.Get("contentIsPageSummary")); DateTime lastModified = DateTools.StringToDate(d.Get("LastModified")); string fragment = plainText; string sectionName = d.Get("SectionName"); IndexableFileInfo newHit = new IndexableFileInfo(filename, filenameParams, doctitle, fragment, sectionName, lastModified, contentIsPageSummary); arrayList.Add(newHit); } // for } finally { searcher.Close(); } return(arrayList.ToArray()); }
public static void Main(System.String[] args) { bool doFix = false; var onlySegments = new List <string>(); System.String indexPath = null; int i = 0; while (i < args.Length) { if (args[i].Equals("-fix")) { doFix = true; i++; } else if (args[i].Equals("-segment")) { if (i == args.Length - 1) { System.Console.Out.WriteLine("ERROR: missing name for -segment option"); System.Environment.Exit(1); } onlySegments.Add(args[i + 1]); i += 2; } else { if (indexPath != null) { System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'"); System.Environment.Exit(1); } indexPath = args[i]; i++; } } if (indexPath == null) { System.Console.Out.WriteLine("\nERROR: index path not specified"); System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n"); System.Environment.Exit(1); } if (!AssertsOn()) { System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled"); } if (onlySegments.Count == 0) { onlySegments = null; } else if (doFix) { System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment"); System.Environment.Exit(1); } System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n"); Directory dir = null; try { dir = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath)); } catch (Exception t) { Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting"); Console.Out.WriteLine(t.StackTrace); Environment.Exit(1); } var checker = new CheckIndex(dir); var tempWriter = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding) { AutoFlush = true }; checker.SetInfoStream(tempWriter); Status result = checker.CheckIndex_Renamed_Method(onlySegments); if (result.missingSegments) { System.Environment.Exit(1); } if (!result.clean) { if (!doFix) { System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); } else { Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for (var s = 0; s < 5; s++) { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); System.Console.Out.WriteLine(" " + (5 - s) + "..."); } Console.Out.WriteLine("Writing..."); checker.FixIndex(result); Console.Out.WriteLine("OK"); Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\""); } } System.Console.Out.WriteLine(""); int exitCode; if (result != null && result.clean == true) { exitCode = 0; } else { exitCode = 1; } System.Environment.Exit(exitCode); }
} // constructor /// <summary> /// Searches the keyword index using the keywordQuery. /// /// See http://www.dotlucene.net/documentation/QuerySyntax.html for the format of the keywordQuery. /// /// This function will return a fully-filled array of IndexableFileInfo objects. /// </summary> /// <param name="keywordQuery"></param> /// <param name="queryForHighlighter"></param> /// <returns></returns> public IndexableFileInfo[] doSearch(string keywordQuery, string queryForHighlighter) { IndexSearcher searcher; IndexReader indexReader; try { FSDirectory indexDir = FSDirectory.GetDirectory(this.luceneIndexDir, false); indexReader = IndexReader.Open(indexDir); searcher = new IndexSearcher(indexReader); } catch { // if the luceneIndexDir does not contain index files (yet), IndexSearcher // throws a nice Exception. return(new IndexableFileInfo[0]); } List <IndexableFileInfo> arrayList = new List <IndexableFileInfo>(); try { string Query = keywordQuery; if (Query == String.Empty) { return(new IndexableFileInfo[0]); } string HighlighterQuery = queryForHighlighter; // -- weirdly enough, when the query is empty, an exception is thrown during the QueryParser.Parse // this hack gets around that. if (HighlighterQuery == String.Empty) { HighlighterQuery = Guid.NewGuid().ToString(); } // parse the query, "text" is the default field to search // note: use the StandardAnalyzer! (the SimpleAnalyzer doesn't work correctly when searching by fields that are integers!) // MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new hatWebPortalAnalyzer()); MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new string[] { "title", "contents" }, new SimpleAnalyzer()); queryParser.SetDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.Parse(Query); QueryParser highlightQueryParser = new QueryParser("contents", new hatWebPortalAnalyzer()); Query highlighterQuery = highlightQueryParser.Parse(HighlighterQuery); query = searcher.Rewrite(query); // is this needed?? " Expert: called to re-write queries into primitive queries." // search Hits hits = searcher.Search(query, Sort.RELEVANCE); // create highlighter Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new QueryScorer(highlighterQuery)); // -- go through hits and return results for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); string filename = d.Get("filename"); string plainText = d.Get("contents"); string title = d.Get("title"); string sectionName = d.Get("SectionName"); string filenameParams = d.Get("filenameParams"); bool contentIsPageSummary = Convert.ToBoolean(d.Get("contentIsPageSummary")); double score = Convert.ToDouble(hits.Score(i)); DateTime lastModified = DateTools.StringToDate(d.Get("LastModified")); TokenStream tokenStream = new hatWebPortalAnalyzer().TokenStream("contents", new StringReader(plainText)); string fragment = plainText; if (!contentIsPageSummary) { fragment = highlighter.GetBestFragments(tokenStream, plainText, 2, "..."); } IndexableFileInfo newHit = new IndexableFileInfo(filename, filenameParams, title, fragment, sectionName, lastModified, contentIsPageSummary, score); arrayList.Add(newHit); } // for } finally { searcher.Close(); indexReader.Close(); } return(arrayList.ToArray()); } // SearchActiveDocument
/// <summary>Returns an IndexReader reading the index in an FSDirectory in the named /// path. /// </summary> public static IndexReader Open(System.String path) { return(Open(FSDirectory.GetDirectory(path, false), true)); }
public static void Main(System.String[] argv) { try { var index = new DirectoryInfo("index"); bool create = false; DirectoryInfo root = null; var usage = "IndexHTML [-create] [-index <index>] <root_directory>"; if (argv.Length == 0) { Console.Error.WriteLine("Usage: " + usage); return; } for (int i = 0; i < argv.Length; i++) { if (argv[i].Equals("-index")) { // parse -index option index = new DirectoryInfo(argv[++i]); } else if (argv[i].Equals("-create")) { // parse -create option create = true; } else if (i != argv.Length - 1) { Console.Error.WriteLine("Usage: " + usage); return; } else { root = new DirectoryInfo(argv[i]); } } if (root == null) { Console.Error.WriteLine("Specify directory to index"); Console.Error.WriteLine("Usage: " + usage); return; } var start = DateTime.Now; using (var writer = new IndexWriter(FSDirectory.Open(index), new StandardAnalyzer(Version.LUCENE_30), create, new IndexWriter.MaxFieldLength(1000000))) { if (!create) { // We're not creating a new index, iterate our index and remove // any stale documents. IndexDocs(writer, root, index, Operation.RemoveStale); } var operation = create ? Operation.CompleteReindex : Operation.IncrementalReindex; IndexDocs(writer, root, index, operation); // add new docs Console.Out.WriteLine("Optimizing index..."); writer.Optimize(); writer.Commit(); } var end = DateTime.Now; Console.Out.Write(end.Millisecond - start.Millisecond); Console.Out.WriteLine(" total milliseconds"); } catch (Exception e) { Console.Error.WriteLine(e.StackTrace); } }
public virtual void TestIndexAndMerge() { System.IO.MemoryStream sw = new System.IO.MemoryStream(); System.IO.StreamWriter out_Renamed = new System.IO.StreamWriter(sw); Directory directory = FSDirectory.Open(indexDir); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); SegmentInfo si1 = IndexDoc(writer, "test.txt"); PrintSegment(out_Renamed, si1); SegmentInfo si2 = IndexDoc(writer, "test2.txt"); PrintSegment(out_Renamed, si2); writer.Close(); SegmentInfo siMerge = Merge(si1, si2, "merge", false); PrintSegment(out_Renamed, siMerge); SegmentInfo siMerge2 = Merge(si1, si2, "merge2", false); PrintSegment(out_Renamed, siMerge2); SegmentInfo siMerge3 = Merge(siMerge, siMerge2, "merge3", false); PrintSegment(out_Renamed, siMerge3); directory.Close(); out_Renamed.Close(); sw.Close(); System.String multiFileOutput = System.Text.ASCIIEncoding.ASCII.GetString(sw.ToArray()); //System.out.println(multiFileOutput); sw = new System.IO.MemoryStream(); out_Renamed = new System.IO.StreamWriter(sw); directory = FSDirectory.Open(indexDir); writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); si1 = IndexDoc(writer, "test.txt"); PrintSegment(out_Renamed, si1); si2 = IndexDoc(writer, "test2.txt"); PrintSegment(out_Renamed, si2); writer.Close(); siMerge = Merge(si1, si2, "merge", true); PrintSegment(out_Renamed, siMerge); siMerge2 = Merge(si1, si2, "merge2", true); PrintSegment(out_Renamed, siMerge2); siMerge3 = Merge(siMerge, siMerge2, "merge3", true); PrintSegment(out_Renamed, siMerge3); directory.Close(); out_Renamed.Close(); sw.Close(); System.String singleFileOutput = System.Text.ASCIIEncoding.ASCII.GetString(sw.ToArray()); Assert.AreEqual(multiFileOutput, singleFileOutput); }
protected override void Dispose(bool disposing) { if (isDisposed) return; if (disposing) { if (fsDir != null) { fsDir.Close(); } } fsDir = null; isDisposed = true; }
public static void Main(System.String[] argv) { try { System.IO.FileInfo index = new System.IO.FileInfo("index"); bool create = false; System.IO.FileInfo root = null; System.String usage = "IndexHTML [-create] [-index <index>] <root_directory>"; if (argv.Length == 0) { System.Console.Error.WriteLine("Usage: " + usage); return; } for (int i = 0; i < argv.Length; i++) { if (argv[i].Equals("-index")) { // parse -index option index = new System.IO.FileInfo(argv[++i]); } else if (argv[i].Equals("-create")) { // parse -create option create = true; } else if (i != argv.Length - 1) { System.Console.Error.WriteLine("Usage: " + usage); return; } else { root = new System.IO.FileInfo(argv[i]); } } if (root == null) { System.Console.Error.WriteLine("Specify directory to index"); System.Console.Error.WriteLine("Usage: " + usage); return; } System.DateTime start = System.DateTime.Now; if (!create) { // delete stale docs deleting = true; IndexDocs(root, index, create); } writer = new IndexWriter(FSDirectory.Open(index), new StandardAnalyzer(Version.LUCENE_CURRENT), create, new IndexWriter.MaxFieldLength(1000000)); IndexDocs(root, index, create); // add new docs System.Console.Out.WriteLine("Optimizing index..."); writer.Optimize(); writer.Close(); System.DateTime end = System.DateTime.Now; System.Console.Out.Write(end.Millisecond - start.Millisecond); System.Console.Out.WriteLine(" total milliseconds"); } catch (System.Exception e) { System.Console.Error.WriteLine(e.StackTrace); } }