/// <summary> /// Annotates the given sequence of <see cref="Document"/> objects by adding a <b>_highlight</b> field; /// the <b>_highlight</b> field will contain the best matching text fragment from the <see cref="Document"/> /// object's full-text field. /// </summary> /// <param name="hits">The sequence of <see cref="Document"/> objects.</param> /// <param name="criteria">The search criteria that produced the hits.</param> /// <returns> /// The original sequence of Document objects, with a <b>_highlight</b> field added to each Document. /// </returns> public static IEnumerable<Document> GenerateHighlights(this IEnumerable<Document> hits, SearchCriteria criteria) { if (hits == null) throw new ArgumentNullException(nameof(hits)); if (criteria == null) throw new ArgumentNullException(nameof(criteria)); if (String.IsNullOrWhiteSpace(criteria.Query)) throw new ArgumentException("SearchCriteria.Query cannot be empty"); var documents = hits.ToList(); try { var indexDirectory = new RAMDirectory(); var analyzer = new FullTextAnalyzer(); var config = new IndexWriterConfig(analyzer).SetRAMBufferSizeMB(_ramBufferSizeMB); var writer = new IndexWriter(indexDirectory, config); BuidIndex(documents, writer); GenerateHighlights(documents, writer, criteria); writer.DeleteAll(); writer.Commit(); writer.Close(); indexDirectory.Close(); } catch (Exception ex) { _log.Error(ex); } return documents; }
public virtual void TestByteSizeLimit() { // tests that the max merge size constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); const int numSegments = 15; for (int i = 0; i < numSegments; i++) { int numDocs = i == 7 ? 30 : 1; AddDocs(writer, numDocs); } writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); double min = sis.Info(0).SizeInBytes(); conf = NewWriterConfig(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); lmp.MaxMergeMBForForcedMerge = (min + 1) / (1 << 20); conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
private Directory CreateIndex() { Directory dir = new RAMDirectory(); Document[] data = CreateData(); TestDataDigester testDigester = new TestDataDigester(_facetHandlers, data); BoboIndexer indexer = new BoboIndexer(testDigester, dir); indexer.Index(); using (var r = IndexReader.Open(dir, false)) { } return(dir); }
public LuceneSearch(string path) { _luceneDir = Path.Combine("C:\\Lucene\\", path); try //to garantee that the C:/Lucene is created { _ramDirectory = new RAMDirectory(_diskDirectory); } catch { _ramDirectory = new RAMDirectory(); _indWriter = new IndexWriter(_diskDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); CloseWriter(); } sort = new Sort(new SortField[] { new SortField("Weight", SortField.INT) }); hits_limit = 10000; }
private static void Main(string[] args) { var directory = new RAMDirectory(); var writer = new IndexWriter(directory, new StandardAnalyzer()); AddDocument(writer, "group", "stuff", Field.Index.UN_TOKENIZED); AddDocument(writer, "group", "other stuff", Field.Index.UN_TOKENIZED); writer.Close(true); var searcher = new IndexSearcher(directory); Hits hits = searcher.Search(new TermQuery(new Term("group", "stuff"))); for (int i = 0; i < hits.Length(); i++) { Console.WriteLine(hits.Doc(i).GetField("group").StringValue()); } }
public void AncestorsOrSelf_With_Examine() { using (var luceneDir = new RAMDirectory()) { var indexer = IndexInitializer.GetUmbracoIndexer(luceneDir); indexer.RebuildIndex(); var ctx = GetUmbracoContext("/test", 1234); var searcher = IndexInitializer.GetUmbracoSearcher(luceneDir); var cache = new ContextualPublishedMediaCache(new PublishedMediaCache(ctx.Application, searcher, indexer), ctx); //we are using the media.xml media to test the examine results implementation, see the media.xml file in the ExamineHelpers namespace var publishedMedia = cache.GetById(3113); var ancestors = publishedMedia.AncestorsOrSelf(); Assert.IsTrue(ancestors.Select(x => x.Id).ContainsAll(new[] { 3113, 2112, 2222, 1111 })); } }
/// <summary> /// Clears all results from the current index. During the next search the index will be rebuilt. /// </summary> public void Clear() { lck.AcquireWriterLock(WriterTimeOut); try { if (rd != null) { rd.Close(); rd = null; } } finally { lck.ReleaseWriterLock(); } }
/// <summary> /// This will load a file based index into RAM /// </summary> protected virtual RAMDirectory CreateDirectory(string folder) { FileUtil.EnsureFolder(folder); Lucene.Net.Store.FSDirectory tempDirectory = Lucene.Net.Store.FSDirectory.GetDirectory(folder, false); var directory = new RAMDirectory(tempDirectory); using (new IndexLocker(directory.MakeLock("write.lock"))) { if (!IndexReader.IndexExists(directory)) { new IndexWriter(directory, this._analyzer, true).Close(); } } return(directory); }
public void SetUp() { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); //Add series of docs with misspelt names AddDoc(writer, "jonathon smythe", "1"); AddDoc(writer, "jonathan smith", "2"); AddDoc(writer, "johnathon smyth", "3"); AddDoc(writer, "johnny smith", "4"); AddDoc(writer, "jonny smith", "5"); AddDoc(writer, "johnathon smythe", "6"); writer.Close(); searcher = new IndexSearcher(directory, true); }
private Directory BuildIndex(IEnumerable <Person> people) { var directory = new RAMDirectory(); using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)) using (var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000))) { // the writer and analyzer will popuplate the directory with documents foreach (Person person in people) { var document = new Document(); document.Add(new Field("Id", person.Id.ToString(), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("Name", person.Name, Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("NumberOfConnections", person.NumberOfConnections.ToString(), Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("WorkExperienceInMonths", person.WorkExperienceInMonths.ToString(), Field.Store.YES, Field.Index.ANALYZED)); string all = person.Name; foreach (var experience in person.Experiences) { document.Add(new Field("Organisation", experience.Organisation, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("Role", experience.Role, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("Duration", experience.Duration, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("DurationInMonths", experience.DurationInMonths.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //all += " "+experience.Organisation +" "+ experience.Role+" "; all += " " + experience.Organisation + " "; } foreach (var education in person.Education) { document.Add(new Field("Institute", education.Institute, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.Add(new Field("Degree", education.Degree, Field.Store.YES, Field.Index.NOT_ANALYZED)); all += " " + education.Institute + " " + education.Degree + " "; } foreach (var skill in person.Skills) { document.Add(new Field("Skill", skill.Name, Field.Store.YES, Field.Index.NOT_ANALYZED)); } document.Add(new Field("All", all, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Optimize(); writer.Flush(true, true, true); } return(directory); }
private static void Main(string[] args) { using (var directory = new RAMDirectory()) { var ctx = directory.GetProviderContext(false); using (var session = ctx.OpenSession <Person>()) { session.Add(new[] { new Person { FirstName = "Matt", LastName = "Babcock" }, new Person { FirstName = "Alicia", LastName = "Babcock" }, new Person { FirstName = "Ayden", LastName = "Babcock" }, new Person { FirstName = "Dakota", LastName = "Babcock" }, new Person { FirstName = "Ryenn", LastName = "Babcock" }, new Person { FirstName = "Robert", LastName = "Greenhagen" }, }); session.Commit(); } Console.WriteLine(ctx.AsQueryable <Person>().Where(p => p.FirstName.SimilarTo("Robbert")).ToList().Dump()); Console.ReadKey(true); } }
static void Main(string[] args) { var version = Lucene.Net.Util.Version.LUCENE_30; Directory dir = new RAMDirectory(); //Analyzer analyzer = new PerFieldAnalyzerWrapper( // new StandardAnalyzer(version), // new Dictionary<string, Analyzer>() { { "text", new KeywordAnalyzer() } }); Analyzer analyzer = new StandardAnalyzer(version); var indexWriter = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); var document = new Document(); //document.Add(new Field("text", "CD-ROM anders ABC65/66.txt.zip ", Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("text", "12345", Field.Store.YES, Field.Index.ANALYZED)); document.Add(new Field("text2", "Anders", Field.Store.YES, Field.Index.ANALYZED)); indexWriter.AddDocument(document); indexWriter.Commit(); indexWriter.Dispose(); var parser = new QueryParser(version, "text", analyzer); //var parser = new Lucene.Net.QueryParsers.MultiFieldQueryParser(version, new string[] { "text", "text2" }, analyzer); //var parser = new ExtendedMultiFieldQueryParser(version, new[] { "text", "text2" }, analyzer); //parser.SetAllowLeadingWildcard(true); //parser.ReverseFields = new[] { "text" }; Query q = parser.Parse("12345"); //Query q = parser.Parse("*21"); //q = new PrefixQuery(new Term("text", "54")); var searcher = new IndexSearcher(dir, true); TopDocs hits = searcher.Search(q, null, 5, Sort.RELEVANCE); Console.WriteLine("Found {0} document(s) that matched query '{1}':", hits.TotalHits, q); foreach (ScoreDoc match in hits.ScoreDocs) { Document doc = searcher.Doc(match.Doc); Console.WriteLine("Matched {0}", doc.Get("text")); } searcher.Dispose(); }
/// <summary> /// Generates a document for each given camera name and adds it to the search index. /// After this method is called, the searcher is initialized and ready to use. /// </summary> /// <param name="cameraNames">List of camera names to generate search docs for.</param> public CameraSearchIndex(IList <string> cameraNames) { index = new RAMDirectory(); var altNameGenerator = new AlternateNameGenerator(); using (IndexWriter writer = new IndexWriter(index, analyzer, IndexWriter.MaxFieldLength.LIMITED)) { foreach (string title in cameraNames) { var document = CreateDocument(title, altNameGenerator.GenerateAlternateCameraNames(title)); writer.AddDocument(document); } writer.Optimize(); } searcher = new IndexSearcher(index); }
public void FullTextConfigSerializeIndexLuceneRAM() { RAMDirectory directory = new RAMDirectory(); ConfigurationSerializationContext context = new ConfigurationSerializationContext(); INode obj = context.Graph.CreateBlankNode(); context.NextSubject = obj; directory.SerializeConfiguration(context); TestTools.ShowGraph(context.Graph); ConfigurationLoader.AutoConfigureObjectFactories(context.Graph); Object temp = ConfigurationLoader.LoadObject(context.Graph, obj); Assert.IsTrue(temp is RAMDirectory, "Should have returned a RAMDirectory instance"); Assert.IsTrue(temp is Directory, "Should have returned a Directory instance"); }
public void Init() { facetHandlers = new List <IFacetHandler>(); directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); AddMetaDataField(doc, PathHandlerName, new String[] { "/a/b/c", "/a/b/d" }); writer.AddDocument(doc); writer.Commit(); PathFacetHandler pathHandler = new PathFacetHandler("path", true); facetHandlers.Add(pathHandler); }
void GenerateIndex() { _ramDirectory = new RAMDirectory(); var standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29); var indexWriter = new IndexWriter(_ramDirectory, standardAnalyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); foreach (var document in _documents) { indexWriter.AddDocument(document); } indexWriter.Optimize(); indexWriter.Close(); }
public void Isolated() { var ramDirectory = new RAMDirectory(); using (new IndexWriter(ramDirectory, new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.UNLIMITED)){} var inMemoryRavenConfiguration = new InMemoryRavenConfiguration(); inMemoryRavenConfiguration.Initialize(); var simpleIndex = new SimpleIndex(ramDirectory, 0, new IndexDefinition { Map = @"from s in docs.Softs select new { s.f_platform, s.f_name, s.f_alias,s.f_License,s.f_totaldownload}", Analyzers = { { "f_name", typeof(KeywordAnalyzer).AssemblyQualifiedName }, { "f_alias", typeof(KeywordAnalyzer).AssemblyQualifiedName }, }, Indexes = { { "f_platform", FieldIndexing.NotAnalyzed }, { "f_License", FieldIndexing.NotAnalyzed }, { "f_totaldownload", FieldIndexing.NotAnalyzed }, { "f_name", FieldIndexing.Analyzed }, { "f_alias", FieldIndexing.Analyzed }, }, SortOptions = { { "f_totaldownload", SortOptions.Int }, { "f_License", SortOptions.Int }, } }, new MapOnlyView(), new WorkContext() { Configuration = inMemoryRavenConfiguration }); var perFieldAnalyzerWrapper = simpleIndex.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), new List <Action>()); var tokenStream = perFieldAnalyzerWrapper.TokenStream("f_name", new StringReader("hello Shrek")); while (tokenStream.IncrementToken()) { var attribute = (TermAttribute)tokenStream.GetAttribute <ITermAttribute>(); Assert.Equal("hello Shrek", attribute.Term); } }
static void Main(string[] args) { Directory dir = new RAMDirectory(); CreateDocuments(dir); IndexReader reader = IndexReader.Open(dir, true); var searcher = new IndexSearcher(reader); int numDocs = reader.MaxDoc; var mlt = new Lucene.Net.Search.Similar.MoreLikeThis(reader); mlt.SetFieldNames(new String[] { "name" }); mlt.MinTermFreq = 1; mlt.MinDocFreq = 1; for (int docId = 0; docId < numDocs; docId++) { Document doc = reader.Document(docId); Console.WriteLine(doc.Get("name")); Query query = mlt.Like(docId); Console.WriteLine(" query = {0}", query); TopDocs similarDocs = searcher.Search(query, 10); if (similarDocs.TotalHits == 0) { Console.WriteLine(" None like this"); } for (int i = 0; i < similarDocs.ScoreDocs.Length; i++) { if (similarDocs.ScoreDocs[i].Doc != docId) { doc = reader.Document(similarDocs.ScoreDocs[i].Doc); Console.WriteLine(" -> {0}", doc.Get("name")); } } Console.WriteLine(); } searcher.Dispose(); reader.Dispose(); dir.Dispose(); }
public void AnIndexCanBeDeleted() { TestDataGenerator tdg = new TestDataGenerator(); Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48); _indexer = new LuceneIndexer(directory, analyzer); _indexer.CreateIndex(tdg.AllData); Assert.Equal(2000, _indexer.Count()); _indexer.DeleteAll(); Assert.Equal(0, _indexer.Count()); directory.ClearLock("write.lock"); analyzer.Dispose(); directory.Dispose(); }
public void FullTextDatasetLucene1() { //Lucene Index Directory dir = new RAMDirectory(); LuceneSubjectsIndexer indexer = new LuceneSubjectsIndexer(dir, new StandardAnalyzer(LuceneTestHarness.LuceneVersion), new DefaultIndexSchema()); //Test Dataset InMemoryDataset memData = new InMemoryDataset(); FullTextIndexedDataset dataset = new FullTextIndexedDataset(memData, indexer, false); //Test Graph Graph g = new Graph(); g.LoadFromEmbeddedResource("VDS.RDF.Configuration.configuration.ttl"); dataset.AddGraph(g); Assert.True(dataset.HasGraph(g.BaseUri), "Graph should exist in dataset"); //Now do a search to check all the triples got indexed String searchTerm = "http"; IEnumerable <Triple> searchTriples = g.Triples.Where(t => t.Object.NodeType == NodeType.Literal && t.Object.ToString().Contains("http")); LuceneSearchProvider searcher = new LuceneSearchProvider(LuceneTestHarness.LuceneVersion, dir); foreach (Triple searchTriple in searchTriples) { INode targetNode = searchTriple.Subject; IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); Assert.True(results.Any(r => r.Node.Equals(targetNode)), "Did not find expected node " + targetNode.ToString(this._formatter) + " in search results using search term '" + searchTerm + "' (found " + results.Count() + " results)"); Console.WriteLine(); } //Now remove the Graph dataset.RemoveGraph(g.BaseUri); //Repeat the search to check all the triples got unindexed foreach (Triple searchTriple in searchTriples) { INode targetNode = searchTriple.Subject; IEnumerable <IFullTextSearchResult> results = searcher.Match(searchTerm); Assert.False(results.Any(r => r.Node.Equals(targetNode)), "Found unexpected node " + targetNode.ToString(this._formatter) + " in search results using search term '" + searchTerm + "' (found " + results.Count() + " results)"); Console.WriteLine(); } searcher.Dispose(); indexer.Dispose(); }
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes: //ORIGINAL LINE: @Test void samplingOfLargeNumericValues() throws Exception //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: internal virtual void SamplingOfLargeNumericValues() { using (RAMDirectory dir = new RAMDirectory(), WritableIndexPartition indexPartition = new WritableIndexPartition(new File("testPartition"), dir, IndexWriterConfigs.standard())) { InsertDocument(indexPartition, 1, long.MaxValue); InsertDocument(indexPartition, 2, int.MaxValue); indexPartition.MaybeRefreshBlocking(); using (PartitionSearcher searcher = indexPartition.AcquireSearcher()) { NonUniqueLuceneIndexSampler sampler = new NonUniqueLuceneIndexSampler(searcher.IndexSearcher, _taskControl.newInstance(), new IndexSamplingConfig(Config.defaults())); assertEquals(new IndexSample(2, 2, 2), sampler.SampleIndex()); } } }
public void TestNullDocIdSetIterator() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); IndexReader reader = IndexReader.Open(dir, true); Filter filter = new AnonymousFilter2(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // the caching filter should return the empty set constant Assert.AreSame(DocIdSet.EMPTY_DOCIDSET, cacher.GetDocIdSet(reader)); reader.Close(); }
private void DoTestSearch(System.IO.StreamWriter out_Renamed, bool useCompoundFile) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); writer.UseCompoundFile = useCompoundFile; System.String[] docs = new System.String[] { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c" }; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(new Field("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d, null); } writer.Close(); Searcher searcher = new IndexSearcher(directory, true, null); System.String[] queries = new System.String[] { "a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\"" }; ScoreDoc[] hits = null; QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "contents", analyzer); parser.PhraseSlop = 4; for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); out_Renamed.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query, null, 1000, null).ScoreDocs; out_Renamed.WriteLine(hits.Length + " total results"); for (int i = 0; i < hits.Length && i < 10; i++) { Document d = searcher.Doc(hits[i].Doc, null); out_Renamed.WriteLine(i + " " + hits[i].Score + " " + d.Get("contents", null)); } } searcher.Close(); }
public void TestReadersWriters() { Directory dir; using (dir = new RAMDirectory()) { Document doc; IndexWriter writer; IndexReader reader; using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true)) { Field field = new Field("name", "value", Field.Store.YES, Field.Index.ANALYZED); doc = new Document(); doc.Add(field); writer.AddDocument(doc); writer.Commit(); using (reader = writer.GetReader()) { IndexReader r1 = reader.Reopen(); } try { IndexReader r2 = reader.Reopen(); Assert.Fail("IndexReader shouldn't be open here"); } catch (AlreadyClosedException) { } } try { writer.AddDocument(doc); Assert.Fail("IndexWriter shouldn't be open here"); } catch (AlreadyClosedException) { } Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory"); } Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory"); }
private void DoTestSearch(System.IO.StringWriter out_Renamed, bool useCompoundFile) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetUseCompoundFile(useCompoundFile); System.String[] docs = new System.String[] { "a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c" }; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(Field.Text("contents", docs[j])); writer.AddDocument(d); } writer.Close(); Searcher searcher = new IndexSearcher(directory); System.String[] queries = new System.String[] { "a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\"" }; Hits hits = null; QueryParsers.QueryParser parser = new QueryParsers.QueryParser("contents", analyzer); parser.SetPhraseSlop(4); for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); out_Renamed.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query); out_Renamed.WriteLine(hits.Length() + " total results"); for (int i = 0; i < hits.Length() && i < 10; i++) { Document d = hits.Doc(i); out_Renamed.WriteLine(i + " " + hits.Score(i) + " " + d.Get("contents")); } } searcher.Close(); }
protected Lucene.Net.Store.Directory OpenOrCreateLuceneDirectory( IndexDefinition indexDefinition, string indexName = null, bool createIfMissing = true) { Lucene.Net.Store.Directory directory; if (indexDefinition.IsTemp || configuration.RunInMemory) { directory = new RAMDirectory(); new IndexWriter(directory, dummyAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED).Close(); // creating index structure } else { var indexDirectory = indexName ?? IndexDefinitionStorage.FixupIndexName(indexDefinition.Name, path); var indexFullPath = Path.Combine(path, MonoHttpUtility.UrlEncode(indexDirectory)); directory = FSDirectory.Open(new DirectoryInfo(indexFullPath)); if (!IndexReader.IndexExists(directory)) { if (createIfMissing == false) { throw new InvalidOperationException("Index does not exists: " + indexDirectory); } //creating index structure if we need to new IndexWriter(directory, dummyAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED).Close(); } else { if (directory.FileExists("write.lock")) // we had an unclean shutdown { if (configuration.ResetIndexOnUncleanShutdown) { throw new InvalidOperationException("Rude shutdown detected on: " + indexDirectory); } CheckIndexAndRecover(directory, indexDirectory); IndexWriter.Unlock(directory); directory.DeleteFile("write.lock"); } } } return(directory); }
public static void MyClassInitialize() { RAMDirectory rd = new RAMDirectory(); IndexWriter writer = new IndexWriter(rd, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("string", i.ToString(), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new NumericField("int", Field.Store.YES, true).SetIntValue(i)); doc.Add(new NumericField("long", Field.Store.YES, true).SetLongValue(i)); doc.Add(new NumericField("double", Field.Store.YES, true).SetDoubleValue(i)); doc.Add(new NumericField("float", Field.Store.YES, true).SetFloatValue(i)); writer.AddDocument(doc); } writer.Close(); reader = IndexReader.Open(rd, true); }
public IDictionary <string, long> Rank(string queryString, IEnumerable <IPackageSearchMetadata> entries) { using (var directory = new RAMDirectory()) { AddToIndex(directory, entries); var searcher = new IndexSearcher(directory); var query = NuGetQuery.MakeQuery(queryString); var topDocs = searcher.Search(query, entries.Count()); var ranking = topDocs.ScoreDocs .Select(d => searcher.Doc(d.Doc)) .Zip(Enumerable.Range(0, topDocs.ScoreDocs.Length).Reverse(), (doc, rank) => new { doc, rank }) .ToDictionary(x => x.doc.Get("Id"), x => (long)x.rank); return(ranking); } }
/// <summary> /// Clears the current index and then resuilds it. /// </summary> public void Reset() { lck.AcquireWriterLock(WriterTimeOut); try { if (rd != null) { rd.Dispose(); rd = null; } BuildIndex(); } finally { lck.ReleaseWriterLock(); } }
protected Lucene.Net.Store.Directory OpenOrCreateLuceneDirectory(IndexDefinition indexDefinition, string indexName = null) { Lucene.Net.Store.Directory directory; if (indexDefinition.IsTemp || configuration.RunInMemory) { directory = new RAMDirectory(); } else { var indexDirectory = indexName ?? IndexDefinitionStorage.FixupIndexName(indexDefinition.Name, path); directory = FSDirectory.Open(new DirectoryInfo(Path.Combine(path, MonoHttpUtility.UrlEncode(indexDirectory)))); } //creating index structure if we need to new IndexWriter(directory, dummyAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED).Close(); return(directory); }
private static Directory CreateIndex() { Directory directory = new RAMDirectory(); IndexWriter writer = GetWriter(directory); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(new StringField("foo", Convert.ToString(i), Store.YES)); writer.AddDocument(doc); } writer.Commit(); writer.Dispose(); return(directory); }
public void Init() { directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(); writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); AddDoc("1", new String[] { "aa", "bb" }, new String[] { "aaa", "aaa" }, new int[] { 100, 200 }); AddDoc("2", new String[] { "aa", "bb" }, new String[] { "aaa", "bbb" }, new int[] { 200, 200 }); AddDoc("3", new String[] { "aa", "bb" }, new String[] { "bbb", "aaa" }, new int[] { 300, 300 }); AddDoc("3", new String[] { "bb", "aa" }, new String[] { "bbb", "bbb" }, new int[] { 300, 400 }); AddDoc("3", new String[] { "bb", "aa" }, new String[] { "aaa", "ccc" }, new int[] { 300, 500 }); writer.Commit(); IndexReader reader = IndexReader.Open(directory, true); searcher = new IndexSearcher(reader); IndexReader readerWithCache = new IndexReaderWithMetaDataCache(reader); searcherWithCache = new IndexSearcher(readerWithCache); }
public virtual void TestUniqueValuesCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); int uniqueValueCount = TestUtil.NextInt(Random(), 1, 256); IList<long> values = new List<long>(); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { long value; if (values.Count < uniqueValueCount) { value = Random().NextLong(); values.Add(value); } else { value = RandomInts.RandomFrom(Random(), values); } dvf.LongValue = value; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 20; ++i) { dvf.LongValue = RandomInts.RandomFrom(Random(), values); iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs did not cost 8 bytes each Assert.IsTrue(size2 < size1 + 8 * 20); }
public virtual void TestNonCFSLeftovers() { Directory[] dirs = new Directory[2]; for (int i = 0; i < dirs.Length; i++) { dirs[i] = new RAMDirectory(); IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = true; d.Add(new Field("c", "v", customType)); w.AddDocument(d); w.Dispose(); } IndexReader[] readers = new IndexReader[] { DirectoryReader.Open(dirs[0]), DirectoryReader.Open(dirs[1]) }; Directory dir = new MockDirectoryWrapper(Random(), new RAMDirectory()); IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(NewLogMergePolicy(true)); MergePolicy lmp = conf.MergePolicy; // Force creation of CFS: lmp.NoCFSRatio = 1.0; lmp.MaxCFSSegmentSizeMB = double.PositiveInfinity; IndexWriter w3 = new IndexWriter(dir, conf); w3.AddIndexes(readers); w3.Dispose(); // we should now see segments_X, // segments.gen,_Y.cfs,_Y.cfe, _Z.si Assert.AreEqual(5, dir.ListAll().Length, "Only one compound segment should exist, but got: " + Arrays.ToString(dir.ListAll())); dir.Dispose(); }
public void TestCustomMergeScheduler() { // we don't really need to execute anything, just to make sure the custom MS // compiles. But ensure that it can be used as well, e.g., no other hidden // dependencies or something. Therefore, don't use any random API ! Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); conf.SetMergeScheduler(new ReportingMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.AddDocument(new Document()); writer.Commit(); // trigger flush writer.ForceMerge(1); writer.Dispose(); dir.Dispose(); }
public virtual void TestSingleBigValueCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 20000; ++i) { dvf.LongValue = i & 1023; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); dvf.LongValue = long.MaxValue; iwriter.AddDocument(doc); iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new value did not grow the bpv for every other value Assert.IsTrue(size2 < size1 + (20000 * (63 - 10)) / 8); }
public virtual void TestSingleMergeableTooLargeSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 5, true); // delete the last document writer.DeleteDocuments(new Term("id", "4")); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); Assert.IsTrue(sis.Info(0).HasDeletions()); }
public virtual void TestNumDocsLimit() { // tests that the max merge docs constraint is applied during forceMerge. Directory dir = new RAMDirectory(); // Prepare an index w/ several small segments and a large one. IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 3 segments in the index, because one of them exceeds the size limit SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(3, sis.Size()); }
public virtual void TestZeroPosIncr() { Directory dir = new RAMDirectory(); Token[] tokens = new Token[3]; tokens[0] = new Token(); tokens[0].Append("a"); tokens[0].PositionIncrement = 1; tokens[1] = new Token(); tokens[1].Append("b"); tokens[1].PositionIncrement = 0; tokens[2] = new Token(); tokens[2].Append("c"); tokens[2].PositionIncrement = 0; RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new TextField("field", new CannedTokenStream(tokens))); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("field", new CannedTokenStream(tokens))); writer.AddDocument(doc); IndexReader r = writer.Reader; writer.Dispose(); IndexSearcher s = NewSearcher(r); MultiPhraseQuery mpq = new MultiPhraseQuery(); //mpq.setSlop(1); // NOTE: not great that if we do the else clause here we // get different scores! MultiPhraseQuery counts that // phrase as occurring twice per doc (it should be 1, I // think?). this is because MultipleTermPositions is able to // return the same position more than once (0, in this // case): if (true) { mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0); mpq.Add(new Term[] { new Term("field", "a") }, 0); } else { mpq.Add(new Term[] { new Term("field", "a") }, 0); mpq.Add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0); } TopDocs hits = s.Search(mpq, 2); Assert.AreEqual(2, hits.TotalHits); Assert.AreEqual(hits.ScoreDocs[0].Score, hits.ScoreDocs[1].Score, 1e-5); /* for(int hit=0;hit<hits.TotalHits;hit++) { ScoreDoc sd = hits.ScoreDocs[hit]; System.out.println(" hit doc=" + sd.Doc + " score=" + sd.Score); } */ r.Dispose(); dir.Dispose(); }
public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
public virtual void TestOneLargeOneSmall() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 5); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(4, sis.Size()); }
public virtual void TestMergeFactor() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 3); AddDocs(writer, 5); AddDocs(writer, 3); AddDocs(writer, 3); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; lmp.MergeFactor = 2; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Should only be 4 segments in the index, because of the merge factor and // max merge docs settings. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(4, sis.Size()); }
public virtual void TestWriterLock() { // native fslock impl gets angry if we use it, so use RAMDirectory explicitly. var indexDir = new RAMDirectory(); var tw = new DirectoryTaxonomyWriter(indexDir); tw.AddCategory(new FacetLabel("hi", "there")); tw.Commit(); // we deliberately not close the write now, and keep it open and // locked. // Verify that the writer worked: var tr = new DirectoryTaxonomyReader(indexDir); Assert.AreEqual(2, tr.GetOrdinal(new FacetLabel("hi", "there"))); // Try to open a second writer, with the first one locking the directory. // We expect to get a LockObtainFailedException. try { Assert.Null(new DirectoryTaxonomyWriter(indexDir)); Fail("should have failed to write in locked directory"); } catch (LockObtainFailedException) { // this is what we expect to happen. } // Remove the lock, and now the open should succeed, and we can // write to the new writer. DirectoryTaxonomyWriter.Unlock(indexDir); var tw2 = new DirectoryTaxonomyWriter(indexDir); tw2.AddCategory(new FacetLabel("hey")); tw2.Dispose(); // See that the writer indeed wrote: var newtr = TaxonomyReader.OpenIfChanged(tr); Assert.NotNull(newtr); tr.Dispose(); tr = newtr; Assert.AreEqual(3, tr.GetOrdinal(new FacetLabel("hey"))); tr.Dispose(); tw.Dispose(); indexDir.Dispose(); }
public virtual void TestUpgradeOldSingleSegmentIndexWithAdditions() { foreach (string name in OldSingleSegmentNames) { if (VERBOSE) { Console.WriteLine("testUpgradeOldSingleSegmentIndexWithAdditions: index=" + name); } Directory dir = NewDirectory(OldIndexDirs[name]); Assert.AreEqual(1, GetNumberOfSegments(dir), "Original index must be single segment"); // create a bunch of dummy segments int id = 40; RAMDirectory ramDir = new RAMDirectory(); for (int i = 0; i < 3; i++) { // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge: MergePolicy mp = Random().NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy(); IndexWriterConfig iwc = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))).SetMergePolicy(mp); IndexWriter w = new IndexWriter(ramDir, iwc); // add few more docs: for (int j = 0; j < RANDOM_MULTIPLIER * Random().Next(30); j++) { AddDoc(w, id++); } w.Dispose(false); } // add dummy segments (which are all in current // version) to single segment index MergePolicy mp_ = Random().NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy(); IndexWriterConfig iwc_ = (new IndexWriterConfig(TEST_VERSION_CURRENT, null)).SetMergePolicy(mp_); IndexWriter iw = new IndexWriter(dir, iwc_); iw.AddIndexes(ramDir); iw.Dispose(false); // determine count of segments in modified index int origSegCount = GetNumberOfSegments(dir); NewIndexUpgrader(dir).Upgrade(); int segCount = CheckAllSegmentsUpgraded(dir); Assert.AreEqual(origSegCount, segCount, "Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged"); dir.Dispose(); } }
public virtual void TestSingleNonMergeableSegment() { Directory dir = new RAMDirectory(); IndexWriterConfig conf = NewWriterConfig(); IndexWriter writer = new IndexWriter(dir, conf); AddDocs(writer, 3, true); writer.Dispose(); conf = NewWriterConfig(); LogMergePolicy lmp = new LogDocMergePolicy(); lmp.MaxMergeDocs = 3; conf.SetMergePolicy(lmp); writer = new IndexWriter(dir, conf); writer.ForceMerge(1); writer.Dispose(); // Verify that the last segment does not have deletions. SegmentInfos sis = new SegmentInfos(); sis.Read(dir); Assert.AreEqual(1, sis.Size()); }