public override void SetUp() { base.SetUp(); System.String[] data = new System.String[]{"A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6"}; index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i))); doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("all","all")); if (null != data[i]) { doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("data",data[i])); } writer.AddDocument(doc); } writer.Optimize(); writer.Close(); r = IndexReader.Open(index, true); s = new IndexSearcher(r); //System.out.println("Set up " + getName()); }
public static void AfterClass() { Dir.Dispose(); Dir = null; FieldInfos = null; TestDoc = null; }
public override void SetUp() { base.SetUp(); Document doc; Rd1 = NewDirectory(); IndexWriter iw1 = new IndexWriter(Rd1, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); doc = new Document(); doc.Add(NewTextField("field1", "the quick brown fox jumps", Field.Store.YES)); doc.Add(NewTextField("field2", "the quick brown fox jumps", Field.Store.YES)); iw1.AddDocument(doc); iw1.Dispose(); Rd2 = NewDirectory(); IndexWriter iw2 = new IndexWriter(Rd2, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); doc = new Document(); doc.Add(NewTextField("field1", "the fox jumps over the lazy dog", Field.Store.YES)); doc.Add(NewTextField("field3", "the fox jumps over the lazy dog", Field.Store.YES)); iw2.AddDocument(doc); iw2.Dispose(); this.Ir1 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd1)); this.Ir2 = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(Rd2)); }
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; Terms = new Term[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int)Math.Ceiling(Math.Pow(f, power)); Terms[i] = new Term("f", char.ToString((char)('A' + i))); } IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE)); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (Random().Next(freq[j]) == 0) { d.Add(NewStringField("f", Terms[j].Text(), Field.Store.NO)); //System.out.println(d); } } iw.AddDocument(d); } iw.ForceMerge(1); iw.Dispose(); }
public void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, Similarity, TimeZone); writer.Dispose(); Reader = DirectoryReader.Open(Directory); }
public LuceneTesterBase(LuceneDirectory directory, LuceneAnalyzer analyzer, LuceneVersion version) { Analyzer = analyzer; CurrentLuceneVersion = version; IndexDirectory = directory; Debug = false; }
public static void BeforeClass() { Dir = NewDirectory(); Sdir1 = NewDirectory(); Sdir2 = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, new MockAnalyzer(Random())); RandomIndexWriter swriter1 = new RandomIndexWriter(Random(), Sdir1, new MockAnalyzer(Random())); RandomIndexWriter swriter2 = new RandomIndexWriter(Random(), Sdir2, new MockAnalyzer(Random())); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(NewStringField("data", Convert.ToString(i), Field.Store.NO)); writer.AddDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).AddDocument(doc); } writer.ForceMerge(1); swriter1.ForceMerge(1); swriter2.ForceMerge(1); writer.Dispose(); swriter1.Dispose(); swriter2.Dispose(); Reader = DirectoryReader.Open(Dir); Searcher = NewSearcher(Reader); MultiReader = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Sdir2) }, true); MultiSearcher = NewSearcher(MultiReader); MultiReaderDupls = new MultiReader(new IndexReader[] { DirectoryReader.Open(Sdir1), DirectoryReader.Open(Dir) }, true); MultiSearcherDupls = NewSearcher(MultiReaderDupls); }
public virtual void SetUp() { // Create an index writer. directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); // oldest doc: // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 writer.AddDocument(CreateDocument("Document 1", 633275835220000000L)); // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 writer.AddDocument(CreateDocument("Document 2", 633275835260000000L)); // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 writer.AddDocument(CreateDocument("Document 3", 633276835330000000L)); // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 writer.AddDocument(CreateDocument("Document 4", 633276865290000000L)); // latest doc: // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 writer.AddDocument(CreateDocument("Document 5", 633277923430000000L)); //// oldest doc: //// Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 //writer.AddDocument(CreateDocument("Document 1", 1192001122000L)); //// Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 //writer.AddDocument(CreateDocument("Document 2", 1192001126000L)); //// Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 //writer.AddDocument(CreateDocument("Document 3", 1192101133000L)); //// Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 //writer.AddDocument(CreateDocument("Document 4", 1192104129000L)); //// latest doc: //// Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 //writer.AddDocument(CreateDocument("Document 5", 1192209943000L)); writer.Optimize(); writer.Close(); }
public static void AfterClass() { Reader.Dispose(); Directory.Dispose(); Reader = null; Directory = null; }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); Document doc = new Document(); Field field = NewStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.Add(field); for (int i = 0; i < 5137; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); field.StringValue = "meaninglessnames"; for (int i = 5138; i < 11377; ++i) { writer.AddDocument(doc); } field.StringValue = "tangfulin"; writer.AddDocument(doc); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
public LuceneIndexingService( IPackageSource packageSource, Lucene.Net.Store.Directory directory) { _packageSource = packageSource; _directory = directory; }
public static void AfterClass() { Reader.Dispose(); Small.Dispose(); Reader = null; Small = null; }
// TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs public virtual void BuildIndex(Directory dir) { Random random = Random(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); int boost = Random().Next(255); Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES); f.Boost = boost; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(ByteTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); docs.Dispose(); }
private int[] starts; // 1st docno for each segment #endregion Fields #region Constructors /// <summary>Construct reading the named set of readers. </summary> internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory, bool readOnly) : base(directory, sis, closeDirectory, readOnly) { // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in // reverse because IndexWriter merges & deletes // the newest segments first. SegmentReader[] readers = new SegmentReader[sis.Count]; for (int i = sis.Count - 1; i >= 0; i--) { try { readers[i] = SegmentReader.Get(readOnly, sis.Info(i)); } catch (System.IO.IOException e) { // Close all readers we had opened: for (i++; i < sis.Count; i++) { try { readers[i].Close(); } catch (System.IO.IOException) { // keep going - we want to clean up as much as possible } } throw e; } } Initialize(readers); }
public virtual void CreateRandomTerms(int nDocs, int nTerms, double power, Directory dir) { int[] freq = new int[nTerms]; for (int i = 0; i < nTerms; i++) { int f = (nTerms + 1) - i; // make first terms less frequent freq[i] = (int) System.Math.Ceiling(System.Math.Pow(f, power)); terms[i] = new Term("f", System.Convert.ToString((char) ('A' + i))); } IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < nDocs; i++) { Document d = new Document(); for (int j = 0; j < nTerms; j++) { if (r.Next(freq[j]) == 0) { d.Add(new Field("f", terms[j].Text(), Field.Store.NO, Field.Index.UN_TOKENIZED)); //System.out.println(d); } } iw.AddDocument(d); } iw.Optimize(); iw.Close(); }
public static void BeforeClass() { Directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory); writer.Dispose(); Reader = DirectoryReader.Open(Directory); }
public override void SetUp() { base.SetUp(); // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. NumIterations = Codec.Default.Name.Equals("Lucene3x") ? 10 * RANDOM_MULTIPLIER : AtLeast(50); Dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.YES); doc.Add(field); Terms = new SortedSet<BytesRef>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random()); field.StringValue = s; Terms.Add(new BytesRef(s)); writer.AddDocument(doc); } TermsAutomaton = BasicAutomata.MakeStringUnion(Terms); Reader = writer.Reader; Searcher = NewSearcher(Reader); writer.Dispose(); }
internal SegmentMerger(IndexWriter writer, System.String name) { InitBlock(); directory = writer.GetDirectory(); segment = name; termIndexInterval = writer.GetTermIndexInterval(); }
/// <summary> Current version number from segments file.</summary> public static long ReadCurrentVersion(Directory directory) { IndexInput input = directory.OpenInput(IndexFileNames.SEGMENTS); int format = 0; long version = 0; try { format = input.ReadInt(); if (format < 0) { if (format < FORMAT) throw new System.IO.IOException("Unknown format version: " + format); version = input.ReadLong(); // read version } } finally { input.Close(); } if (format < 0) return version; // We cannot be sure about the format of the file. // Therefore we have to read the whole file and cannot simply seek to the version entry. SegmentInfos sis = new SegmentInfos(); sis.Read(directory); return sis.GetVersion(); }
public PreFlexRWStoredFieldsWriter(Directory directory, string segment, IOContext context) { Debug.Assert(directory != null); this.Directory = directory; this.Segment = segment; bool success = false; try { FieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); IndexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); FieldsStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); IndexStream.WriteInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); success = true; } finally { if (!success) { Abort(); } } }
public override void SetUp() { base.SetUp(); INDEX_SIZE = AtLeast(2000); Index = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Index); RandomGen random = new RandomGen(this, Random()); for (int i = 0; i < INDEX_SIZE; ++i) // don't decrease; if to low the { // problem doesn't show up Document doc = new Document(); if ((i % 5) != 0) // some documents must not have an entry in the first { // sort field doc.Add(NewStringField("publicationDate_", random.LuceneDate, Field.Store.YES)); } if ((i % 7) == 0) // some documents to match the query (see below) { doc.Add(NewTextField("content", "test", Field.Store.YES)); } // every document has a defined 'mandant' field doc.Add(NewStringField("mandant", Convert.ToString(i % 3), Field.Store.YES)); writer.AddDocument(doc); } Reader = writer.Reader; writer.Dispose(); Query = new TermQuery(new Term("content", "test")); }
/// <summary> /// Save a single segment's info. </summary> public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) { string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene46SegmentInfoFormat.SI_EXTENSION); si.AddFile(fileName); IndexOutput output = dir.CreateOutput(fileName, ioContext); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT); // Write the Lucene version that created this segment, since 3.1 output.WriteString(si.Version); output.WriteInt(si.DocCount); output.WriteByte((byte)(sbyte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO)); output.WriteStringStringMap(si.Diagnostics); output.WriteStringSet(si.Files); CodecUtil.WriteFooter(output); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(output); si.Dir.DeleteFile(fileName); } else { output.Dispose(); } } }
public override void TearDown() { base.TearDown(); base.TearDown(); dir = null; anlzr = null; }
public static void BeforeClass() { Directory = NewDirectory(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(); RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, analyzer); Documents.Document doc = new Documents.Document(); doc.Add(NewTextField("field", "one two three four five", Field.Store.YES)); doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES)); IndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES); doc.Add(repeatedField); doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); doc = new Documents.Document(); doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES)); writer.AddDocument(doc); Reader = writer.Reader; writer.Dispose(); Searcher = NewSearcher(Reader); }
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize) { bool success = false; try { directory = dir; segment = seg; fieldInfos = fis; origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); size = origEnum.size; totalIndexInterval = origEnum.indexInterval; indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
public override void SetUp() { base.SetUp(); System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testIndex")); Lucene.Net.Util._TestUtil.RmDir(file); dir = FSDirectory.GetDirectory(file); }
/// <summary> /// Create a CachedDirectory /// </summary> /// <param name="CloudProvider">the implimentation for interfacing with the cloud</param> /// <param name="Catalog">name of catalog (folder in blob storage)</param> /// <param name="CacheDirectory">local Directory object to use for local cache</param> public CachedDirectory( ICloudProvider CloudProvider, string Catalog = null, Directory CacheDirectory = null ) { if ( CloudProvider == null ) { throw new ArgumentNullException( "cloudProvider" ); } this.cloudProvider = CloudProvider; string catalog = string.IsNullOrEmpty( Catalog ) ? "lucene" : Catalog.ToLower(); if ( CacheDirectory != null ) { // save it off this.CacheDirectory = CacheDirectory; } else { string cachePath = Path.Combine( Environment.ExpandEnvironmentVariables( "%temp%" ), "LuceneCache" ); DirectoryInfo cacheDir = new DirectoryInfo( cachePath ); if ( !cacheDir.Exists ) { cacheDir.Create(); } string catalogPath = Path.Combine( cachePath, catalog ); DirectoryInfo catalogDir = new DirectoryInfo( catalogPath ); if ( !catalogDir.Exists ) { catalogDir.Create(); } this.CacheDirectory = FSDirectory.Open( catalogPath ); } this.cloudProvider.InitializeStorage(); }
protected override void Init(IDictionary<string, string> arguments, CancellationToken cancellationToken) { _directory = CommandHelpers.GetLuceneDirectory(arguments); _source = arguments.GetOrThrow<string>(Arguments.Source); _verbose = arguments.GetOrDefault(Arguments.Verbose, false); _registration = arguments.GetOrDefault<string>(Arguments.Registration); if (_registration == null) { Logger.LogInformation("Lucene index will be created up to the end of the catalog (alternatively if you provide a registration it will not pass that)"); } _catalogBaseAddress = arguments.GetOrDefault<string>(Arguments.CatalogBaseAddress); if (_catalogBaseAddress == null) { Logger.LogInformation("No catalogBaseAddress was specified so the Lucene index will NOT contain the storage paths"); } _storageBaseAddress = arguments.GetOrDefault<string>(Arguments.StorageBaseAddress); Logger.LogInformation("CONFIG source: \"{ConfigSource}\" registration: \"{Registration}\"" + " catalogBaseAddress: \"{CatalogBaseAddress}\" storageBaseAddress: \"{StorageBaseAddress}\"", _source, _registration ?? "(null)", _catalogBaseAddress ?? "(null)", _storageBaseAddress ?? "(null)"); _handlerFunc = CommandHelpers.GetHttpMessageHandlerFactory(_verbose, _catalogBaseAddress, _storageBaseAddress); }
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.CreateOutput(segment + ".fdt"); indexStream = d.CreateOutput(segment + ".fdx"); doClose = true; }
public override void TearDown() { base.TearDown(); searcher.Close(); mDirectory.Close(); mDirectory = null; }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize) : this(d, segment, fieldInfos, readBufferSize, -1, 0) { }
public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) : this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE) { }
//.NET public CheckAbort(MergePolicy.OneMerge merge, Directory dir, Action <double> work) { this.merge = merge; this.dir = dir; this.Work = work; }
private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex) { Initialize(directory, segment, fis, interval, isIndex); }
internal void FinishCommit(Directory dir) { if (pendingSegnOutput == null) { throw new System.SystemException("prepareCommit was not called"); } bool success = false; try { pendingSegnOutput.FinishCommit(); pendingSegnOutput.Close(); pendingSegnOutput = null; success = true; } finally { if (!success) { RollbackCommit(dir); } } // NOTE: if we crash here, we have left a segments_N // file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others // didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a // reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); success = false; try { dir.Sync(fileName); success = true; } finally { if (!success) { try { dir.DeleteFile(fileName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } lastGeneration = generation; try { IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN); try { genOutput.WriteInt(FORMAT_LOCKLESS); genOutput.WriteLong(generation); genOutput.WriteLong(generation); } finally { genOutput.Close(); } } catch (System.Exception t) { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } }
private void Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores) { segment = si.name; this.si = si; this.readBufferSize = readBufferSize; bool success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = Directory(); if (si.GetUseCompoundFile()) { cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); cfsDir = cfsReader; } Directory storeDir; if (doOpenStores) { if (si.GetDocStoreOffset() != -1) { if (si.GetDocStoreIsCompoundFile()) { storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); storeDir = storeCFSReader; } else { storeDir = Directory(); } } else { storeDir = cfsDir; } } else { storeDir = null; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); System.String fieldsSegment; if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); } else { fieldsSegment = segment; } if (doOpenStores) { fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); // Verify two sources of "maxDoc" agree: if (si.GetDocStoreOffset() == -1 && fieldsReader.Size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount); } } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); LoadDeletedDocs(); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize); proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize); OpenNorms(cfsDir, readBufferSize); if (doOpenStores && fieldInfos.HasVectors()) { // open term vector files only as needed System.String vectorsSegment; if (si.GetDocStoreOffset() != -1) { vectorsSegment = si.GetDocStoreSegment(); } else { vectorsSegment = segment; } termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { DoClose(); } } }
public virtual void TestNorms_Renamed() { // tmp dir System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } // test with a single index: index1 System.IO.FileInfo indexDir1 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1")); Directory dir1 = FSDirectory.Open(indexDir1); norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); CreateIndex(dir1); DoTestNorms(dir1); // test with a single index: index2 System.Collections.ArrayList norms1 = norms; System.Collections.ArrayList modifiedNorms1 = modifiedNorms; int numDocNorms1 = numDocNorms; norms = new System.Collections.ArrayList(); modifiedNorms = new System.Collections.ArrayList(); numDocNorms = 0; System.IO.FileInfo indexDir2 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2")); Directory dir2 = FSDirectory.Open(indexDir2); CreateIndex(dir2); DoTestNorms(dir2); // add index1 and index2 to a third index: index3 System.IO.FileInfo indexDir3 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3")); Directory dir3 = FSDirectory.Open(indexDir3); CreateIndex(dir3); IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.AddIndexes(new Directory[] { dir1, dir2 }); iw.Close(); norms1.AddRange(norms); norms = norms1; modifiedNorms1.AddRange(modifiedNorms); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 VerifyIndex(dir3); DoTestNorms(dir3); // now with optimize iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.Optimize(); iw.Close(); VerifyIndex(dir3); dir1.Close(); dir2.Close(); dir3.Close(); }
public override void SetUp() { base.SetUp(); index = GetIndex(); query = new TermQuery(new Term("content", "test")); }
/// <param name="directory"> /// </param> /// <throws> IOException </throws> public CustomSearcher(TestCustomSearcherSort enclosingInstance, Directory directory, int switcher) : base(directory) { InitBlock(enclosingInstance); this.switcher = switcher; }
public override void Merge(IndexWriter writer) { // TODO: enable this once we are on JRE 1.5 // assert !Thread.holdsLock(writer); this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (Verbose()) { Message("now merge"); Message(" index: " + writer.SegString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { if (Verbose()) { Message(" no more merges pending; now return"); } return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); bool success = false; try { lock (this) { MergeThread merger; while (MergeThreadCount(true) >= maxThreadCount) { if (Verbose()) { Message(" too many merge threads running; stalling..."); } try { System.Threading.Monitor.Wait(this); } catch (System.Threading.ThreadInterruptedException ie) { // In 3.0 we will change this to throw // InterruptedException instead Support.ThreadClass.Current().Interrupt(); throw new System.SystemException(ie.Message, ie); } } if (Verbose()) { Message(" consider merge " + merge.SegString(dir)); } // OK to spawn a new merge thread to handle this // merge: merger = GetMergeThread(writer, merge); mergeThreads.Add(merger); if (Verbose()) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(); success = true; } } finally { if (!success) { writer.MergeFinish(merge); } } } }
/// <summary> Returns userData from latest segments file</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static System.Collections.Generic.IDictionary <string, string> ReadCurrentUserData(Directory directory) { SegmentInfos sis = new SegmentInfos(); sis.Read(directory); return(sis.GetUserData()); }
public FindSegmentsFile(Directory directory) { this.directory = directory; }
/// <summary> This version of read uses the retry logic (for lock-less /// commits) to find the right segments file to load. /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public void Read(Directory directory) { generation = lastGeneration = -1; new AnonymousClassFindSegmentsFile(this, directory).Run(); }
private void Write(Directory directory) { System.String segmentFileName = GetNextSegmentFileName(); // Always advance the generation on write: if (generation == -1) { generation = 1; } else { generation++; } ChecksumIndexOutput segnOutput = new ChecksumIndexOutput(directory.CreateOutput(segmentFileName)); bool success = false; try { segnOutput.WriteInt(CURRENT_FORMAT); // write FORMAT segnOutput.WriteLong(++version); // every write changes // the index segnOutput.WriteInt(counter); // write counter segnOutput.WriteInt(Count); // write infos for (int i = 0; i < Count; i++) { Info(i).Write(segnOutput); } segnOutput.WriteStringStringMap(userData); segnOutput.PrepareCommit(); success = true; pendingSegnOutput = segnOutput; } finally { if (!success) { // We hit an exception above; try to close the file // but suppress any exception: try { segnOutput.Close(); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } try { // Try not to leave a truncated segments_N file in // the index: directory.DeleteFile(segmentFileName); } catch (System.Exception t) { // Suppress so we keep throwing the original exception } } } }
/// <summary> Get the filename of the current segments_N file /// in the directory. /// /// </summary> /// <param name="directory">-- directory to search for the latest segments_N file /// </param> public static System.String GetCurrentSegmentFileName(Directory directory) { return(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", GetCurrentSegmentGeneration(directory))); }
/// <summary> Read a particular segmentFileName. Note that this may /// throw an IOException if a commit is in process. /// /// </summary> /// <param name="directory">-- directory containing the segments file /// </param> /// <param name="segmentFileName">-- segment file to load /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public void Read(Directory directory, System.String segmentFileName) { bool success = false; // Clear any previous segments: Clear(); ChecksumIndexInput input = new ChecksumIndexInput(directory.OpenInput(segmentFileName)); generation = GenerationFromSegmentsFileName(segmentFileName); lastGeneration = generation; try { int format = input.ReadInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < CURRENT_FORMAT) { throw new CorruptIndexException("Unknown format version: " + format); } version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.ReadInt(); i > 0; i--) { // read segmentInfos Add(new SegmentInfo(directory, format, input)); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) { version = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); } // old file format without version number else { version = input.ReadLong(); // read version } } if (format <= FORMAT_USER_DATA) { if (format <= FORMAT_DIAGNOSTICS) { userData = input.ReadStringStringMap(); } else if (0 != input.ReadByte()) { userData = new System.Collections.Generic.Dictionary <string, string>(); userData.Add("userData", input.ReadString()); } else { userData = new System.Collections.Generic.Dictionary <string, string>(); } } else { userData = new System.Collections.Generic.Dictionary <string, string>(); } if (format <= FORMAT_CHECKSUM) { long checksumNow = input.GetChecksum(); long checksumThen = input.ReadLong(); if (checksumNow != checksumThen) { throw new CorruptIndexException("checksum mismatch in segments file"); } } success = true; } finally { input.Close(); if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: Clear(); } } }
/// <summary> /// Test using various international locales with accented characters (which /// sort differently depending on locale). /// </summary> // Copied (and slightly modified) from // Lucene.Net.Search.TestSort.TestInternationalSort() // // TODO: this test is really fragile. there are already 3 different cases, // depending upon unicode version. public virtual void TestCollationKeySort(Analyzer usAnalyzer, Analyzer franceAnalyzer, Analyzer swedenAnalyzer, Analyzer denmarkAnalyzer, string usResult, string frResult, string svResult, string dkResult) { using Directory indexStore = NewDirectory(); using (IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)))) { // document data: // the tracer field is used to determine which document was hit string[][] sortData = new string[][] { new string[] { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, new string[] { "B", "y", "HAT", "HAT", "HAT", "HAT" }, new string[] { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, new string[] { "D", "y", "HUT", "HUT", "HUT", "HUT" }, new string[] { "E", "x", "peach", "peach", "peach", "peach" }, new string[] { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, new string[] { "G", "x", "sin", "sin", "sin", "sin" }, new string[] { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, new string[] { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, new string[] { "J", "y", "HOT", "HOT", "HOT", "HOT" } }; FieldType customType = new FieldType(); customType.IsStored = true; for (int i = 0; i < sortData.Length; ++i) { Document doc = new Document(); doc.Add(new Field("tracer", sortData[i][0], customType)); doc.Add(new TextField("contents", sortData[i][1], Field.Store.NO)); if (sortData[i][2] != null) { doc.Add(new TextField("US", usAnalyzer.GetTokenStream("US", new StringReader(sortData[i][2])))); } if (sortData[i][3] != null) { doc.Add(new TextField("France", franceAnalyzer.GetTokenStream("France", new StringReader(sortData[i][3])))); } if (sortData[i][4] != null) { doc.Add(new TextField("Sweden", swedenAnalyzer.GetTokenStream("Sweden", new StringReader(sortData[i][4])))); } if (sortData[i][5] != null) { doc.Add(new TextField("Denmark", denmarkAnalyzer.GetTokenStream("Denmark", new StringReader(sortData[i][5])))); } writer.AddDocument(doc); } writer.ForceMerge(1); } // writer.Dispose(); using IndexReader reader = DirectoryReader.Open(indexStore); IndexSearcher searcher = new IndexSearcher(reader); Sort sort = new Sort(); Search.Query queryX = new TermQuery(new Term("contents", "x")); Search.Query queryY = new TermQuery(new Term("contents", "y")); sort.SetSort(new SortField("US", SortFieldType.STRING)); this.AssertMatches(searcher, queryY, sort, usResult); sort.SetSort(new SortField("France", SortFieldType.STRING)); this.AssertMatches(searcher, queryX, sort, frResult); sort.SetSort(new SortField("Sweden", SortFieldType.STRING)); this.AssertMatches(searcher, queryY, sort, svResult); sort.SetSort(new SortField("Denmark", SortFieldType.STRING)); this.AssertMatches(searcher, queryY, sort, dkResult); }
/// <summary>Writes & syncs to the Directory dir, taking care to /// remove the segments file on exception /// </summary> public /*internal*/ void Commit(Directory dir) { PrepareCommit(dir); FinishCommit(dir); }
internal float minScore = 0.5f; //LUCENENET-359 Spellchecker accuracy gets overwritten public SpellChecker(Directory gramIndex) { this.SetSpellIndex(gramIndex); }
/// <summary>Create the compound stream in the specified file. The file name is the /// entire name (no extensions are added). /// </summary> /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws> public CompoundFileWriter(Directory dir, System.String name) : this(dir, name, null) { }
/// <summary> /// Returns a <see cref="StoredFieldsWriter"/> to write stored /// fields. /// </summary> public abstract StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo si, IOContext context);
virtual public void SetSpellIndex(Directory spellindex) { this.spellindex = spellindex; }
public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { this.merge = merge; this.dir = dir; }
/// <summary> /// Returns a <see cref="StoredFieldsReader"/> to load stored /// fields. /// </summary> public abstract StoredFieldsReader FieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context);
internal virtual SegmentReader ReopenSegment(SegmentInfo si) { lock (this) { bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); bool normsUpToDate = true; bool[] fieldNormsChanged = new bool[fieldInfos.Size()]; if (normsUpToDate) { for (int i = 0; i < fieldInfos.Size(); i++) { if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) { normsUpToDate = false; fieldNormsChanged[i] = true; } } } if (normsUpToDate && deletionsUpToDate) { return(this); } // clone reader SegmentReader clone = new SegmentReader(); bool success = false; try { clone.directory = directory; clone.si = si; clone.segment = segment; clone.readBufferSize = readBufferSize; clone.cfsReader = cfsReader; clone.storeCFSReader = storeCFSReader; clone.fieldInfos = fieldInfos; clone.tis = tis; clone.freqStream = freqStream; clone.proxStream = proxStream; clone.termVectorsReaderOrig = termVectorsReaderOrig; // we have to open a new FieldsReader, because it is not thread-safe // and can thus not be shared among multiple SegmentReaders // TODO: Change this in case FieldsReader becomes thread-safe in the future System.String fieldsSegment; Directory storeDir = Directory(); if (si.GetDocStoreOffset() != -1) { fieldsSegment = si.GetDocStoreSegment(); if (storeCFSReader != null) { storeDir = storeCFSReader; } } else { fieldsSegment = segment; if (cfsReader != null) { storeDir = cfsReader; } } if (fieldsReader != null) { clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount); } if (!deletionsUpToDate) { // load deleted docs clone.deletedDocs = null; clone.LoadDeletedDocs(); } else { clone.deletedDocs = this.deletedDocs; } clone.norms = new System.Collections.Hashtable(); if (!normsUpToDate) { // load norms for (int i = 0; i < fieldNormsChanged.Length; i++) { // copy unchanged norms to the cloned reader and incRef those norms if (!fieldNormsChanged[i]) { System.String curField = fieldInfos.FieldInfo(i).name; Norm norm = (Norm)this.norms[curField]; norm.IncRef(); clone.norms[curField] = norm; } } clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize); } else { System.Collections.IEnumerator it = norms.Keys.GetEnumerator(); while (it.MoveNext()) { System.String field = (System.String)it.Current; Norm norm = (Norm)norms[field]; norm.IncRef(); clone.norms[field] = norm; } } if (clone.singleNormStream == null) { for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { Directory d = si.GetUseCompoundFile() ? cfsReader : Directory(); System.String fileName = si.GetNormFileName(fi.number); if (si.HasSeparateNorms(fi.number)) { continue; } if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)) { clone.singleNormStream = d.OpenInput(fileName, readBufferSize); break; } } } } success = true; } finally { if (this.referencedSegmentReader != null) { // this reader shares resources with another SegmentReader, // so we increment the other readers refCount. We don't // increment the refCount of the norms because we did // that already for the shared norms clone.referencedSegmentReader = this.referencedSegmentReader; referencedSegmentReader.IncRefReaderNotNorms(); } else { // this reader wasn't reopened, so we increment this // readers refCount clone.referencedSegmentReader = this; IncRefReaderNotNorms(); } if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone.DecRef(); } } return(clone); } }
internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.CreateOutput(segment + ".fdt"); indexStream = d.CreateOutput(segment + ".fdx"); }
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) { using (_lock.Write()) { _writer = writer; _directory = writer.Directory; if (Verbose) { Message("now merge"); Message(" index: " + writer.SegString()); } // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { long startStallTime = 0; while (writer.HasPendingMerges() && MergeThreadCount >= MaxMergeCount) { // this means merging has fallen too far behind: we // have already created maxMergeCount threads, and // now there's at least one more merge pending. // Note that only maxThreadCount of // those created merge threads will actually be // running; the rest will be paused (see // updateMergeThreads). We stall this producer // thread to prevent creation of new segments, // until merging has caught up: startStallTime = Environment.TickCount; if (Verbose) { Message(" too many merges; stalling..."); } _manualResetEvent.Reset(); _manualResetEvent.Wait(); } if (Verbose) { if (startStallTime != 0) { Message(" stalled for " + (Environment.TickCount - startStallTime) + " msec"); } } MergePolicy.OneMerge merge = writer.NextMerge(); if (merge == null) { if (Verbose) { Message(" no more merges pending; now return"); } return; } bool success = false; try { if (Verbose) { Message(" consider merge " + writer.SegString(merge.Segments)); } // OK to spawn a new merge thread to handle this // merge: var merger = CreateTask(writer, merge); merger.MergeThreadCompleted += OnMergeThreadCompleted; _mergeThreads.Add(merger); if (Verbose) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(_taskScheduler); // Must call this after starting the thread else // the new thread is removed from mergeThreads // (since it's not alive yet): UpdateMergeThreads(); success = true; } finally { if (!success) { writer.MergeFinish(merge); } } } } }
public Action <double> Work; //.NET public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { this.merge = merge; this.dir = dir; this.Work = Internal_Work; }
/// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir, int readBufferSize) { return(Get(dir, si, sis, closeDir, ownDir, readBufferSize, true)); }
internal TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval) { Initialize(directory, segment, fis, interval, false); other = new TermInfosWriter(directory, segment, fis, interval, true); other.other = this; }