/// <summary> /// Set up a new index in RAM with three test phrases and the supplied Analyzer. /// </summary> /// <exception cref="Exception"> if an error occurs with index writer or searcher </exception> public override void SetUp() { base.SetUp(); analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), 2); directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc; doc = new Document(); doc.Add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "just another test sentence", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "a sentence which contains no test", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); reader = DirectoryReader.Open(directory); searcher = NewSearcher(reader); }
public SimpleTextStoredFieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) { _fieldInfos = fn; var success = false; try { _input = directory.OpenInput( IndexFileNames.SegmentFileName(si.Name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION), context); success = true; } finally { if (!success) { try { Dispose(); } catch { // ensure we throw our original exception } } } ReadIndex(si.DocCount); }
protected SpatialContext ctx;//subclass must initialize public override void SetUp() { base.SetUp(); directory = NewDirectory(); Random random = Random(); indexWriter = new RandomIndexWriter(random, directory, newIndexWriterConfig(random)); indexReader = indexWriter.Reader; indexSearcher = NewSearcher(indexReader); }
public override void SetUp() { base.SetUp(); directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT))); Document doc = new Document(); doc.Add(new StringField("partnum", "Q36", Field.Store.YES)); doc.Add(new TextField("description", "Illidium Space Modulator", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); reader = DirectoryReader.Open(directory); searcher = NewSearcher(reader); }
public override void BeforeClass() { base.BeforeClass(); dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new Int32AssociationFacetField(2, "int", "a")); doc.Add(new SingleAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new Int32AssociationFacetField(3, "int", "b")); doc.Add(new SingleAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.GetReader(); writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public virtual void TestReplaceTaxonomyDirectory() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriter w = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); var tw = new DirectoryTaxonomyWriter(taxoDir); w.Commit(); tw.Commit(); Store.Directory taxoDir2 = NewDirectory(); var tw2 = new DirectoryTaxonomyWriter(taxoDir2); tw2.AddCategory(new FacetLabel("a", "b")); tw2.Dispose(); var mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null); SearcherAndTaxonomy pair = mgr.Acquire(); try { Assert.AreEqual(1, pair.TaxonomyReader.Count); } finally { mgr.Release(pair); } w.AddDocument(new Document()); tw.ReplaceTaxonomy(taxoDir2); taxoDir2.Dispose(); w.Commit(); tw.Commit(); mgr.MaybeRefresh(); pair = mgr.Acquire(); try { Assert.AreEqual(3, pair.TaxonomyReader.Count); } finally { mgr.Release(pair); } IOUtils.Close(mgr, tw, w, taxoDir, indexDir); }
public void TestDuelMemoryIndexCoreDirectoryWithArrayField() { string field_name = "text"; MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); if (Random.nextBoolean()) { mockAnalyzer.SetOffsetGap(Random.nextInt(100)); } //index into a random directory FieldType type = new FieldType(TextField.TYPE_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPayloads = (false); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); Document doc = new Document(); doc.Add(new Field(field_name, "la la", type)); doc.Add(new Field(field_name, "foo bar foo bar foo", type)); Store.Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, mockAnalyzer)); writer.UpdateDocument(new Term("id", "1"), doc); writer.Commit(); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); //Index document in Memory index MemoryIndex memIndex = new MemoryIndex(true); memIndex.AddField(field_name, "la la", mockAnalyzer); memIndex.AddField(field_name, "foo bar foo bar foo", mockAnalyzer); //compare term vectors Terms ramTv = reader.GetTermVector(0, field_name); IndexReader memIndexReader = memIndex.CreateSearcher().IndexReader; Terms memTv = memIndexReader.GetTermVector(0, field_name); CompareTermVectors(ramTv, memTv, field_name); memIndexReader.Dispose(); reader.Dispose(); dir.Dispose(); }
public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) { Debug.Assert(info.HasDeletions()); var scratch = new BytesRef(); var scratchUtf16 = new CharsRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen); ChecksumIndexInput input = null; var success = false; try { input = dir.OpenChecksumInput(fileName, context); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SIZE)); var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16); var bits = new BitArray(size); SimpleTextUtil.ReadLine(input, scratch); while (!scratch.Equals(END)) { Debug.Assert(StringHelper.StartsWith(scratch, DOC)); var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16); bits.Set(docid, true); SimpleTextUtil.ReadLine(input, scratch); } SimpleTextUtil.CheckFooter(input); success = true; return(new SimpleTextBits(bits, size)); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
public static void Main(string[] args) { string field = null; int numTerms = DEFAULT_NUMTERMS; if (args.Length == 0 || args.Length > 4) { // LUCENENET specific - our wrapper console shows the correct usage throw new ArgumentException(); //Usage(); //Environment.Exit(1); } Store.Directory dir = FSDirectory.Open(new DirectoryInfo(args[0])); IComparer <TermStats> comparer = new DocFreqComparer(); for (int i = 1; i < args.Length; i++) { if (args[i].Equals("-t", StringComparison.Ordinal)) { comparer = new TotalTermFreqComparer(); } else { try { numTerms = Convert.ToInt32(args[i]); } catch (FormatException) { field = args[i]; } } } using (IndexReader reader = DirectoryReader.Open(dir)) { TermStats[] terms = GetHighFreqTerms(reader, numTerms, field, comparer); for (int i = 0; i < terms.Length; i++) { Console.WriteLine("{0}:{1} \t totalTF = {2:#,##0} \t doc freq = {3:#,##0} \n", terms[i].Field, terms[i].GetTermText(), terms[i].TotalTermFreq, terms[i].DocFreq); } } }
public override Bits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) { Debug.Assert(info.HasDeletions()); var scratch = new BytesRef(); var scratchUtf16 = new CharsRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen); ChecksumIndexInput input = null; var success = false; try { input = dir.OpenChecksumInput(fileName, context); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SIZE)); var size = ParseIntAt(scratch, SIZE.Length, scratchUtf16); var bits = new BitArray(size); SimpleTextUtil.ReadLine(input, scratch); while (!scratch.Equals(END)) { Debug.Assert(StringHelper.StartsWith(scratch, DOC)); var docid = ParseIntAt(scratch, DOC.Length, scratchUtf16); bits.SafeSet(docid, true); SimpleTextUtil.ReadLine(input, scratch); } SimpleTextUtil.CheckFooter(input); success = true; return new SimpleTextBits(bits, size); } finally { if (success) { IOUtils.Close(input); } else { IOUtils.CloseWhileHandlingException(input); } } }
public void TestBuild() { string LF = Environment.NewLine; string input = "oneword" + LF + "twoword" + LF + "threeword"; PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(input)); Store.Directory ramDir = NewDirectory(); SpellChecker spellChecker = new SpellChecker(ramDir); spellChecker.IndexDictionary(ptd, NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false); string[] similar = spellChecker.SuggestSimilar("treeword", 2); assertEquals(2, similar.Length); assertEquals(similar[0], "threeword"); assertEquals(similar[1], "oneword"); spellChecker.Dispose(); ramDir.Dispose(); }
public virtual void TestWithThreads() { // LUCENE-5303: OrdinalsCache used the ThreadLocal BinaryDV instead of reader.getCoreCacheKey(). Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); IndexWriter writer = new IndexWriter(indexDir, conf); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(new FacetField("A", "1")); writer.AddDocument(config.Build(taxoWriter, doc)); doc = new Document(); doc.Add(new FacetField("A", "2")); writer.AddDocument(config.Build(taxoWriter, doc)); var reader = DirectoryReader.Open(writer, true); CachedOrdinalsReader ordsReader = new CachedOrdinalsReader(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); ThreadJob[] threads = new ThreadJob[3]; for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousClass(this, "CachedOrdsThread-" + i, reader, ordsReader); } long ramBytesUsed = 0; foreach (ThreadJob t in threads) { t.Start(); t.Join(); if (ramBytesUsed == 0) { ramBytesUsed = ordsReader.RamBytesUsed(); } else { Assert.AreEqual(ramBytesUsed, ordsReader.RamBytesUsed()); } } IOUtils.Dispose(writer, taxoWriter, reader, indexDir, taxoDir); }
public SimpleTextTermVectorsWriter(Directory directory, string segment, IOContext context) { _directory = directory; _segment = segment; bool success = false; try { _output = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", VECTORS_EXTENSION), context); success = true; } finally { if (!success) { Abort(); } } }
public override int DoLogic() { Store.Directory dir = RunData.Directory; DirectoryReader r = null; if (commitUserData != null) { r = DirectoryReader.Open(OpenReaderTask.FindIndexCommit(dir, commitUserData)); } else { r = DirectoryReader.Open(dir); } RunData.SetIndexReader(r); // We transfer reference to the run data r.DecRef(); return(1); }
public SimpleTextStoredFieldsWriter(Directory directory, string segment, IOContext context) { _directory = directory; _segment = segment; var success = false; try { _output = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context); success = true; } finally { if (!success) { Abort(); } } }
public virtual void TestReallyNoNormsForDrillDown() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetSimilarity(new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this)); ITaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path")); writer.AddDocument(config.Build(taxoWriter, doc)); IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public override int DoLogic() { Store.Directory dir = RunData.Directory; DirectoryReader r; // LUCENENET: IDE0059: Remove unnecessary value assignment if (commitUserData != null) { r = DirectoryReader.Open(OpenReaderTask.FindIndexCommit(dir, commitUserData)); } else { r = DirectoryReader.Open(dir); } RunData.SetIndexReader(r); // We transfer reference to the run data r.DecRef(); return(1); }
public virtual void TestEndOffsetPositionWithTeeSinkTokenFilter() { Store.Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); TokenStream tokenStream = analyzer.GetTokenStream("field", "abcd "); TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream); TokenStream sink = tee.NewSinkTokenStream(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; Field f1 = new Field("field", tee, ft); Field f2 = new Field("field", sink, ft); doc.Add(f1); doc.Add(f2); w.AddDocument(doc); w.Dispose(); IndexReader r = DirectoryReader.Open(dir); Terms vector = r.GetTermVectors(0).GetTerms("field"); assertEquals(1, vector.Count); TermsEnum termsEnum = vector.GetIterator(null); termsEnum.Next(); assertEquals(2, termsEnum.TotalTermFreq); DocsAndPositionsEnum positions = termsEnum.DocsAndPositions(null, null); assertTrue(positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, positions.Freq); positions.NextPosition(); assertEquals(0, positions.StartOffset); assertEquals(4, positions.EndOffset); positions.NextPosition(); assertEquals(8, positions.StartOffset); assertEquals(12, positions.EndOffset); assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.NextDoc()); r.Dispose(); dir.Dispose(); }
public static void Main(string[] args) { string field = null; int numTerms = DEFAULT_NUMTERMS; if (args.Length == 0 || args.Length > 4) { Usage(); Environment.Exit(1); } Store.Directory dir = FSDirectory.Open(new DirectoryInfo(args[0])); IComparer <TermStats> comparator = new DocFreqComparator(); for (int i = 1; i < args.Length; i++) { if (args[i].Equals("-t")) { comparator = new TotalTermFreqComparator(); } else { try { numTerms = Convert.ToInt32(args[i]); } catch (FormatException) { field = args[i]; } } } using (IndexReader reader = DirectoryReader.Open(dir)) { TermStats[] terms = GetHighFreqTerms(reader, numTerms, field, comparator); for (int i = 0; i < terms.Length; i++) { Console.WriteLine("{0}:{1} \t totalTF = {2:#,##0} \t doc freq = {3:#,##0} \n", terms[i].Field, terms[i].TermText, terms[i].TotalTermFreq, terms[i].DocFreq); } } }
public override void WriteLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) { var set = ((SimpleTextBits)bits).BITS; var size = bits.Length(); var scratch = new BytesRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.NextDelGen); IndexOutput output = null; var success = false; try { output = dir.CreateOutput(fileName, context); SimpleTextUtil.Write(output, SIZE); SimpleTextUtil.Write(output, Convert.ToString(size), scratch); SimpleTextUtil.WriteNewline(output); for (int i = set.NextSetBit(0); i >= 0; i = set.NextSetBit(i + 1)) { SimpleTextUtil.Write(output, DOC); SimpleTextUtil.Write(output, Convert.ToString(i), scratch); SimpleTextUtil.WriteNewline(output); } SimpleTextUtil.Write(output, END); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (success) { IOUtils.Close(output); } else { IOUtils.CloseWhileHandlingException(output); } } }
public virtual void TestLabelWithDelimiter() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); config.SetMultiValued("dim", true); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("dim", "test\u001Fone")); doc.Add(new FacetField("dim", "test\u001Etwo")); writer.AddDocument(config.Build(taxoWriter, doc)); // NRT open IndexSearcher searcher = NewSearcher(writer.GetReader()); // NRT open var taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector c = new FacetsCollector(); searcher.Search(new MatchAllDocsQuery(), c); Facets facets = GetTaxonomyFacetCounts(taxoReader, config, c); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Fone")); Assert.AreEqual(1, facets.GetSpecificValue("dim", "test\u001Etwo")); FacetResult result = facets.GetTopChildren(10, "dim"); Assert.AreEqual("dim=dim path=[] value=-1 childCount=2\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.ToString()); IOUtils.Dispose(writer, taxoWriter, searcher.IndexReader, taxoReader, dir, taxoDir); }
public void BeforeClass() { dir = NewDirectory(); taxoDir = NewDirectory(); // preparations - index, taxonomy, content var taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Cannot mix ints & floats in the same indexed field: config = new FacetsConfig(); config.SetIndexFieldName("int", "$facets.int"); config.SetMultiValued("int", true); config.SetIndexFieldName("float", "$facets.float"); config.SetMultiValued("float", true); var writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); // index documents, 50% have only 'b' and all have 'a' for (int i = 0; i < 110; i++) { Document doc = new Document(); // every 11th document is added empty, this used to cause the association // aggregators to go into an infinite loop if (i % 11 != 0) { doc.Add(new IntAssociationFacetField(2, "int", "a")); doc.Add(new FloatAssociationFacetField(0.5f, "float", "a")); if (i % 2 == 0) // 50 { doc.Add(new IntAssociationFacetField(3, "int", "b")); doc.Add(new FloatAssociationFacetField(0.2f, "float", "b")); } } writer.AddDocument(config.Build(taxoWriter, doc)); } taxoWriter.Dispose(); reader = writer.Reader; writer.Dispose(); taxoReader = new DirectoryTaxonomyReader(taxoDir); }
public void TestDuellMemIndex() { LineFileDocs lineFileDocs = new LineFileDocs(Random); int numDocs = AtLeast(10); MemoryIndex memory = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); for (int i = 0; i < numDocs; i++) { Store.Directory dir = NewDirectory(); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); mockAnalyzer.MaxTokenLength = (TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, mockAnalyzer)); Document nextDoc = lineFileDocs.NextDoc(); Document doc = new Document(); foreach (IIndexableField field in nextDoc.Fields) { if (field.IndexableFieldType.IsIndexed) { doc.Add(field); if (Random.nextInt(3) == 0) { doc.Add(field); // randomly add the same field twice } } } writer.AddDocument(doc); writer.Dispose(); foreach (IIndexableField field in doc.Fields) { memory.AddField(field.Name, ((Field)field).GetStringValue(), mockAnalyzer); } DirectoryReader competitor = DirectoryReader.Open(dir); AtomicReader memIndexReader = (AtomicReader)memory.CreateSearcher().IndexReader; DuellReaders(competitor, memIndexReader); IOUtils.Dispose(competitor, memIndexReader); memory.Reset(); dir.Dispose(); } lineFileDocs.Dispose(); }
public static IndexCommit FindIndexCommit(Store.Directory dir, string userData) { IList <IndexCommit> commits = DirectoryReader.ListCommits(dir); foreach (IndexCommit ic in commits) { IDictionary <string, string> map = ic.UserData; string ud = null; if (map != null) { //ud = map.get(USER_DATA); map.TryGetValue(USER_DATA, out ud); } if (ud != null && ud.Equals(userData, StringComparison.Ordinal)) { return(ic); } } throw new IOException("index does not contain commit with userData: " + userData); }
/** * Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same. */ public void AssertAllQueries(MemoryIndex memory, Store.Directory ramdir, Analyzer analyzer) { IndexReader reader = DirectoryReader.Open(ramdir); IndexSearcher ram = NewSearcher(reader); IndexSearcher mem = memory.CreateSearcher(); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer) { // LUCENENET specific - to avoid random failures, set the culture // of the QueryParser to invariant Locale = CultureInfo.InvariantCulture }; foreach (string query in queries) { TopDocs ramDocs = ram.Search(qp.Parse(query), 1); TopDocs memDocs = mem.Search(qp.Parse(query), 1); assertEquals(query, ramDocs.TotalHits, memDocs.TotalHits); } reader.Dispose(); }
public virtual void TestSumScoreAggregator() { Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); IndexWriter iw = new IndexWriter(indexDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); FacetsConfig config = new FacetsConfig(); for (int i = AtLeast(30); i > 0; --i) { Document doc = new Document(); if (Random().NextBoolean()) // don't match all documents { doc.Add(new StringField("f", "v", Field.Store.NO)); } doc.Add(new FacetField("dim", "a")); iw.AddDocument(config.Build(taxoWriter, doc)); } DirectoryReader r = DirectoryReader.Open(iw, true); DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); FacetsCollector fc = new FacetsCollector(true); ConstantScoreQuery csq = new ConstantScoreQuery(new MatchAllDocsQuery()); csq.Boost = 2.0f; TopDocs td = FacetsCollector.Search(NewSearcher(r), csq, 10, fc); Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, new TaxonomyFacetSumValueSource.ScoreValueSource()); int expected = (int)(td.MaxScore * td.TotalHits); Assert.AreEqual(expected, (int)facets.GetSpecificValue("dim", "a")); IOUtils.Close(iw, taxoWriter, taxoReader, taxoDir, r, indexDir); }
public override void BeforeClass() { base.BeforeClass(); testDir = CreateTempDir("addIndexesTask"); // create a dummy index under inputDir inputDir = new DirectoryInfo(Path.Combine(testDir.FullName, "input")); Store.Directory tmpDir = NewFSDirectory(inputDir); try { IndexWriter writer = new IndexWriter(tmpDir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); for (int i = 0; i < 10; i++) { writer.AddDocument(new Document()); } writer.Dispose(); } finally { tmpDir.Dispose(); } }
public virtual void TestIndexWriter_LUCENE4656() { Store.Directory directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); TokenStream ts = new EmptyTokenStream(); assertFalse(ts.HasAttribute <ITermToBytesRefAttribute>()); Document doc = new Document(); doc.Add(new StringField("id", "0", Field.Store.YES)); doc.Add(new TextField("description", ts)); // this should not fail because we have no TermToBytesRefAttribute writer.AddDocument(doc); assertEquals(1, writer.NumDocs); writer.Dispose(); directory.Dispose(); }
public virtual void TestDetectHierarchicalField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode_e.CREATE); var writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "other")); try { config.Build(taxoWriter, doc); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestDetectHierarchicalField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); var taxoWriter = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE); var writer = new RandomIndexWriter(Random, dir); FacetsConfig config = new FacetsConfig(); Document doc = new Document(); doc.Add(NewTextField("field", "text", Field.Store.NO)); doc.Add(new FacetField("a", "path", "other")); try { config.Build(taxoWriter, doc); fail("did not hit expected exception"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // expected } IOUtils.Dispose(writer, taxoWriter, dir, taxoDir); }
public virtual void TestMixedTypesInSameIndexField() { Store.Directory dir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new IntAssociationFacetField(14, "a", "x")); doc.Add(new FloatAssociationFacetField(55.0f, "b", "y")); try { writer.AddDocument(config.Build(taxoWriter, doc)); Fail("did not hit expected exception"); } catch (System.ArgumentException) { // expected } IOUtils.Close(writer, taxoWriter, dir, taxoDir); }
public virtual void TestLimitTokenCountIndexWriter() { foreach (bool consumeAll in new bool[] { true, false }) { Store.Directory dir = NewDirectory(); int limit = TestUtil.NextInt32(Random, 50, 101000); MockAnalyzer mock = new MockAnalyzer(Random); // if we are consuming all tokens, we can use the checks, // otherwise we can't mock.EnableChecks = consumeAll; Analyzer a = new LimitTokenCountAnalyzer(mock, limit, consumeAll); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, a)); Document doc = new Document(); StringBuilder b = new StringBuilder(); for (int i = 1; i < limit; i++) { b.Append(" a"); } b.Append(" x"); b.Append(" z"); doc.Add(NewTextField("field", b.ToString(), Field.Store.NO)); writer.AddDocument(doc); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); Term t = new Term("field", "x"); assertEquals(1, reader.DocFreq(t)); t = new Term("field", "z"); assertEquals(0, reader.DocFreq(t)); reader.Dispose(); dir.Dispose(); } }
// clean old stuff, reopen public virtual void Reinit(bool eraseIndex) { // cleanup index IOUtils.Dispose(indexWriter, indexReader, directory); indexWriter = null; indexReader = null; IOUtils.Dispose(taxonomyWriter, taxonomyReader, taxonomyDir); taxonomyWriter = null; taxonomyReader = null; // directory (default is ram-dir). directory = CreateDirectory(eraseIndex, "index", "directory"); taxonomyDir = CreateDirectory(eraseIndex, "taxo", "taxonomy.directory"); // inputs ResetInputs(); // release unused stuff GC.Collect(); // Re-init clock SetStartTimeMillis(); }
public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) { bool success = false; try { _input = directory.OpenInput(IndexFileNames.SegmentFileName(si.Name, "", VECTORS_EXTENSION), context); success = true; } finally { if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception) { } } } ReadIndex(si.DocCount); }
private long[] _offsets; // docid -> offset in .vec file #endregion Fields #region Constructors public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) { bool success = false; try { _input = directory.OpenInput(IndexFileNames.SegmentFileName(si.Name, "", SimpleTextTermVectorsWriter.VECTORS_EXTENSION), context); success = true; } finally { if (!success) { try { Dispose(); } catch (Exception) { // ensure we throw our original exception } } } ReadIndex(si.DocCount); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, SimpleTextFieldInfosWriter.FIELD_INFOS_EXTENSION); var input = directory.OpenChecksumInput(fileName, iocontext); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMFIELDS)); var size = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMFIELDS.Length, scratch)); var infos = new FieldInfo[size]; for (var i = 0; i < size; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NAME)); string name = ReadString(SimpleTextFieldInfosWriter.NAME.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMBER)); int fieldNumber = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMBER.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ISINDEXED)); bool isIndexed = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.ISINDEXED.Length, scratch)); FieldInfo.IndexOptions? indexOptions; if (isIndexed) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.INDEXOPTIONS)); indexOptions = (FieldInfo.IndexOptions)Enum.Parse(typeof(FieldInfo.IndexOptions), ReadString(SimpleTextFieldInfosWriter.INDEXOPTIONS.Length, scratch)); } else { indexOptions = null; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.STORETV)); bool storeTermVector = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.STORETV.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.PAYLOADS)); bool storePayloads = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.PAYLOADS.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS)); bool omitNorms = !Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.NORMS.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS_TYPE)); string nrmType = ReadString(SimpleTextFieldInfosWriter.NORMS_TYPE.Length, scratch); FieldInfo.DocValuesType_e? normsType = DocValuesType(nrmType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES)); string dvType = ReadString(SimpleTextFieldInfosWriter.DOCVALUES.Length, scratch); FieldInfo.DocValuesType_e? docValuesType = DocValuesType(dvType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES_GEN)); long dvGen = Convert.ToInt64(ReadString(SimpleTextFieldInfosWriter.DOCVALUES_GEN.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUM_ATTS)); int numAtts = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUM_ATTS.Length, scratch)); IDictionary<string, string> atts = new Dictionary<string, string>(); for (int j = 0; j < numAtts; j++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_KEY)); string key = ReadString(SimpleTextFieldInfosWriter.ATT_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_VALUE)); string value = ReadString(SimpleTextFieldInfosWriter.ATT_VALUE.Length, scratch); atts[key] = value; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, new ReadOnlyDictionary<string,string>(atts)) { DocValuesGen = dvGen }; } SimpleTextUtil.CheckFooter(input); var fieldInfos = new FieldInfos(infos); success = true; return fieldInfos; } finally { if (success) { input.Dispose(); } else { IOUtils.CloseWhileHandlingException(input); } } }
public override TermVectorsReader VectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) { return new SimpleTextTermVectorsReader(directory, segmentInfo, context); }
public override StoredFieldsWriter FieldsWriter(Directory directory, SegmentInfo si, IOContext context) { return new SimpleTextStoredFieldsWriter(directory, si.Name, context); }
public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { var scratch = new BytesRef(); string segFileName = IndexFileNames.SegmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context); bool success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION)); string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT)); int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND)); bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG)); int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch)); IDictionary<string, string> diagnostics = new Dictionary<string, string>(); for (int i = 0; i < numDiag; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY)); string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE)); string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch); diagnostics[key] = value; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES)); int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch)); var files = new HashSet<string>(); for (int i = 0; i < numFiles; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE)); string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch); files.Add(fileName); } SimpleTextUtil.CheckFooter(input); var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics) {Files = files}; success = true; return info; } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) { var segFileName = IndexFileNames.SegmentFileName(si.Name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); si.AddFile(segFileName); var success = false; var output = dir.CreateOutput(segFileName, ioContext); try { var scratch = new BytesRef(); SimpleTextUtil.Write(output, SI_VERSION); SimpleTextUtil.Write(output, si.Version, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, SI_DOCCOUNT); SimpleTextUtil.Write(output, Convert.ToString(si.DocCount, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, SI_USECOMPOUND); SimpleTextUtil.Write(output, Convert.ToString(si.UseCompoundFile, CultureInfo.InvariantCulture).ToLowerInvariant(), scratch); SimpleTextUtil.WriteNewline(output); IDictionary<string, string> diagnostics = si.Diagnostics; int numDiagnostics = diagnostics == null ? 0 : diagnostics.Count; SimpleTextUtil.Write(output, SI_NUM_DIAG); SimpleTextUtil.Write(output, Convert.ToString(numDiagnostics, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); if (numDiagnostics > 0) { foreach (var diagEntry in diagnostics) { SimpleTextUtil.Write(output, SI_DIAG_KEY); SimpleTextUtil.Write(output, diagEntry.Key, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, SI_DIAG_VALUE); SimpleTextUtil.Write(output, diagEntry.Value, scratch); SimpleTextUtil.WriteNewline(output); } } var files = si.Files; var numFiles = files == null ? 0 : files.Count; SimpleTextUtil.Write(output, SI_NUM_FILES); SimpleTextUtil.Write(output, Convert.ToString(numFiles, CultureInfo.InvariantCulture), scratch); SimpleTextUtil.WriteNewline(output); if (numFiles > 0) { foreach (var fileName in files) { SimpleTextUtil.Write(output, SI_FILE); SimpleTextUtil.Write(output, fileName, scratch); SimpleTextUtil.WriteNewline(output); } } SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(output); try { dir.DeleteFile(segFileName); } catch (Exception) { //Esnure we throw original exeception } } else { output.Dispose(); } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); var output = directory.CreateOutput(fileName, context); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.Write(output, NUMFIELDS); SimpleTextUtil.Write(output, Convert.ToString(infos.Size()), scratch); SimpleTextUtil.WriteNewline(output); foreach (FieldInfo fi in infos) { SimpleTextUtil.Write(output, NAME); SimpleTextUtil.Write(output, fi.Name, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NUMBER); SimpleTextUtil.Write(output, Convert.ToString(fi.Number), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ISINDEXED); SimpleTextUtil.Write(output, Convert.ToString(fi.Indexed), scratch); SimpleTextUtil.WriteNewline(output); if (fi.Indexed) { Debug.Assert(fi.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads()); SimpleTextUtil.Write(output, INDEXOPTIONS); SimpleTextUtil.Write(output, fi.FieldIndexOptions.ToString(), scratch); SimpleTextUtil.WriteNewline(output); } SimpleTextUtil.Write(output, STORETV); SimpleTextUtil.Write(output, Convert.ToString(fi.HasVectors()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, PAYLOADS); SimpleTextUtil.Write(output, Convert.ToString(fi.HasPayloads()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS); SimpleTextUtil.Write(output, Convert.ToString(!fi.OmitsNorms()), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, NORMS_TYPE); SimpleTextUtil.Write(output, GetDocValuesType(fi.NormType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES); SimpleTextUtil.Write(output, GetDocValuesType(fi.DocValuesType), scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, DOCVALUES_GEN); SimpleTextUtil.Write(output, Convert.ToString(fi.DocValuesGen), scratch); SimpleTextUtil.WriteNewline(output); IDictionary<string, string> atts = fi.Attributes(); int numAtts = atts == null ? 0 : atts.Count; SimpleTextUtil.Write(output, NUM_ATTS); SimpleTextUtil.Write(output, Convert.ToString(numAtts), scratch); SimpleTextUtil.WriteNewline(output); if (numAtts <= 0 || atts == null) continue; foreach (var entry in atts) { SimpleTextUtil.Write(output, ATT_KEY); SimpleTextUtil.Write(output, entry.Key, scratch); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.Write(output, ATT_VALUE); SimpleTextUtil.Write(output, entry.Value, scratch); SimpleTextUtil.WriteNewline(output); } } SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
public override TermVectorsWriter VectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) { return new SimpleTextTermVectorsWriter(directory, segmentInfo.Name, context); }
public override void WriteLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) { var set = ((SimpleTextBits) bits).BITS; var size = bits.Length(); var scratch = new BytesRef(); var fileName = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.NextDelGen); IndexOutput output = null; var success = false; try { output = dir.CreateOutput(fileName, context); SimpleTextUtil.Write(output, SIZE); SimpleTextUtil.Write(output, Convert.ToString(size), scratch); SimpleTextUtil.WriteNewline(output); for (int i = set.NextSetBit(0); i >= 0; i = set.NextSetBit(i + 1)) { SimpleTextUtil.Write(output, DOC); SimpleTextUtil.Write(output, Convert.ToString(i), scratch); SimpleTextUtil.WriteNewline(output); } SimpleTextUtil.Write(output, END); SimpleTextUtil.WriteNewline(output); SimpleTextUtil.WriteChecksum(output, scratch); success = true; } finally { if (success) { IOUtils.Close(output); } else { IOUtils.CloseWhileHandlingException(output); } } }
public override StoredFieldsReader FieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) { return new SimpleTextStoredFieldsReader(directory, si, fn, context); }