public void SetUp() { var writer = new IndexWriter(store, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); var doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); }
public virtual void TestNPESpanQuery() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), IndexWriter.MaxFieldLength.LIMITED, null); // Add documents AddDoc(writer, "1", "the big dogs went running to the market"); AddDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); // Commit writer.Close(); // Get searcher IndexReader reader = IndexReader.Open(dir, true, null); IndexSearcher searcher = new IndexSearcher(reader); // Control (make sure docs indexed) Assert.AreEqual(2, HitCount(searcher, "the")); Assert.AreEqual(1, HitCount(searcher, "cat")); Assert.AreEqual(1, HitCount(searcher, "dogs")); Assert.AreEqual(0, HitCount(searcher, "rabbit")); // This throws exception (it shouldn't) Assert.AreEqual(1, searcher.Search(CreateSpan(0, true, new SpanQuery[] { CreateSpan(4, false, "chased", "cat"), CreateSpan("ate") }), 10, null).TotalHits); reader.Close(); dir.Close(); }
private static Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = new System.Random((System.Int32) (BASE_SEED + 42)); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public virtual void TestLucene() { int num = 100; Directory indexA = NewDirectory(); Directory indexB = NewDirectory(); FillIndex(Random(), indexA, 0, num); bool fail = VerifyIndex(indexA, 0); if (fail) { Assert.Fail("Index a is invalid"); } FillIndex(Random(), indexB, num, num); fail = VerifyIndex(indexB, num); if (fail) { Assert.Fail("Index b is invalid"); } Directory merged = NewDirectory(); IndexWriter writer = new IndexWriter(merged, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy(2))); writer.AddIndexes(indexA, indexB); writer.ForceMerge(1); writer.Dispose(); fail = VerifyIndex(merged, 0); Assert.IsFalse(fail, "The merged index is invalid"); indexA.Dispose(); indexB.Dispose(); merged.Dispose(); }
/// <summary> /// Set up a new index in RAM with three test phrases and the supplied Analyzer. /// </summary> /// <exception cref="Exception"> if an error occurs with index writer or searcher </exception> public override void SetUp() { base.SetUp(); analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false), 2); directory = NewDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc; doc = new Document(); doc.Add(new TextField("content", "please divide this sentence into shingles", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "just another test sentence", Field.Store.YES)); writer.AddDocument(doc); doc = new Document(); doc.Add(new TextField("content", "a sentence which contains no test", Field.Store.YES)); writer.AddDocument(doc); writer.Dispose(); reader = DirectoryReader.Open(directory); searcher = NewSearcher(reader); }
public void SetUp() { IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true); Document doc; doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); }
//Rolls back index to a chosen ID private void RollBackLast(int id) { // System.out.println("Attempting to rollback to "+id); System.String ids = "-" + id; IndexCommit last = null; IList<IndexCommit> commits = IndexReader.ListCommits(dir); for (System.Collections.IEnumerator iterator = commits.GetEnumerator(); iterator.MoveNext(); ) { IndexCommit commit = (IndexCommit) iterator.Current; System.Collections.Generic.IDictionary<string, string> ud = commit.GetUserData(); if (ud.Count > 0) if (((System.String) ud["index"]).EndsWith(ids)) last = commit; } if (last == null) throw new System.SystemException("Couldn't find commit point " + id); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), new RollbackDeletionPolicy(this, id), MaxFieldLength.UNLIMITED, last); System.Collections.Generic.IDictionary<string, string> data = new System.Collections.Generic.Dictionary<string, string>(); data["index"] = "Rolled back to 1-" + id; w.Commit(data); w.Close(); }
internal virtual void BuildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen) { IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); iw.SetMaxBufferedDocs(10); for (int j = 0; j < nDocs; j++) { Document d = new Document(); int nFields = r.Next(maxFields); for (int i = 0; i < nFields; i++) { int flen = r.Next(maxFieldLen); System.Text.StringBuilder sb = new System.Text.StringBuilder("^ "); while (sb.Length < flen) { sb.Append(' ').Append(words[r.Next(words.Length)]); } sb.Append(" $"); Field.Store store = Field.Store.YES; // make random later Field.Index index = Field.Index.ANALYZED; // make random later d.Add(new Field("f" + i, sb.ToString(), store, index)); } iw.AddDocument(d, null); } iw.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); long theLong = System.Int64.MaxValue; double theDouble = System.Double.MaxValue; sbyte theByte = (sbyte)System.SByte.MaxValue; short theShort = System.Int16.MaxValue; int theInt = System.Int32.MaxValue; float theFloat = System.Single.MaxValue; for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.Add(new Field("theLong", System.Convert.ToString(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theDouble", (theDouble--).ToString("E16"), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theByte", System.Convert.ToString((sbyte)theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theShort", System.Convert.ToString(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theInt", System.Convert.ToString(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("theFloat", (theFloat--).ToString("E8"), Field.Store.NO, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); reader = IndexReader.Open((Directory)directory, true, null); }
private static IndexWriter GetWriter() { var analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); var writer = new Lucene.Net.Index.IndexWriter(IndexDir, analyzer); return(writer); }
public LuceneIndexer() { luceneIndexDirectory = null; writer = null; analyzer = null; parser = null; }
public SearchEngine() { luceneIndexDirectory = null; // Is set in Create Index analyzer = null; // Is set in CreateAnalyser writer = null; // Is set in CreateWriter CSVdoc = new CSVDocument(); }
void Index() { Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, new Lucene.Net.Analysis.WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Close(); }
protected override void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); writer.SetSimilarity(similarity); }
public void TestEmptyChildFilter() { Directory dir = NewDirectory(); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // we don't want to merge - since we rely on certain segment setup IndexWriter w = new IndexWriter(dir, config); IList<Document> docs = new List<Document>(); docs.Add(MakeJob("java", 2007)); docs.Add(MakeJob("python", 2010)); docs.Add(MakeResume("Lisa", "United Kingdom")); w.AddDocuments(docs); docs.Clear(); docs.Add(MakeJob("ruby", 2005)); docs.Add(MakeJob("java", 2006)); docs.Add(MakeResume("Frank", "United States")); w.AddDocuments(docs); w.Commit(); int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field for (int i = 0; i < num; i++) { docs.Clear(); docs.Add(MakeJob("java", 2007)); w.AddDocuments(docs); } IndexReader r = DirectoryReader.Open(w, Random().NextBoolean()); w.Dispose(); assertTrue(r.Leaves.size() > 1); IndexSearcher s = new IndexSearcher(r); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); BooleanQuery childQuery = new BooleanQuery(); childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST)); childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST)); ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); BooleanQuery fullQuery = new BooleanQuery(); fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST)); fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST)); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true); s.Search(fullQuery, c); TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true); assertFalse(float.IsNaN(results.MaxScore)); assertEquals(1, results.TotalGroupedHitCount); assertEquals(1, results.Groups.Length); IGroupDocs<int> group = results.Groups[0]; Document childDoc = s.Doc(group.ScoreDocs[0].Doc); assertEquals("java", childDoc.Get("skill")); assertNotNull(group.GroupValue); Document parentDoc = s.Doc(group.GroupValue); assertEquals("Lisa", parentDoc.Get("name")); r.Dispose(); dir.Dispose(); }
public LuceneApp() { luceneIndexDirectory = null; analyzer = null; writer = null; newSimilarity = new NewSimilarity(); parserFields = new string[] { DOC_TITLE, DOC_AUTHOR, DOC_BIB, DOC_BODY }; fieldWeights = new Dictionary <string, float>(); foreach (string field in parserFields) { fieldWeights.Add(field, 1); } // Init WordNet // Src: https://developer.syn.co.in/tutorial/wordnet/tutorial.html var directory = "../../../wordnetdic"; wordNetEngine = new WordNetEngine(); // data sources wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adj")), PartOfSpeech.Adjective); wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.adv")), PartOfSpeech.Adverb); wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.noun")), PartOfSpeech.Noun); wordNetEngine.AddDataSource(new StreamReader(Path.Combine(directory, "data.verb")), PartOfSpeech.Verb); // indexes wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adj")), PartOfSpeech.Adjective); wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.adv")), PartOfSpeech.Adverb); wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.noun")), PartOfSpeech.Noun); wordNetEngine.AddIndexSource(new StreamReader(Path.Combine(directory, "index.verb")), PartOfSpeech.Verb); Console.WriteLine("Loading database..."); wordNetEngine.Load(); Console.WriteLine("Load completed."); }
private static void IndexIndicator(IndicatorMetadata indicatorMetadata, IEnumerable<IndicatorMetadataTextProperty> properties, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("id", indicatorMetadata.IndicatorId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); var text = indicatorMetadata.Descriptive; StringBuilder sb = new StringBuilder(); foreach (var indicatorMetadataTextProperty in properties) { var key = indicatorMetadataTextProperty.ColumnName; if (text.ContainsKey(key)) { sb.Append(text[key]); sb.Append(" "); } } doc.Add(new Field("IndicatorText", sb.ToString().ToLower(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(doc); }
public LuceneAdvancedSearchApplication() { luceneIndexDirectory = null; writer = null; analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(VERSION, "English"); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); }
private void btnInitAnalyzer_Click(object sender, EventArgs e) { using (new BusyObject(this)) { if (analyzer == null) { string hspellPath = SelectProjectFolder("Select a path to HSpell data files", "hspell-data-files" + System.IO.Path.DirectorySeparatorChar); if (hspellPath == null) return; MorphAnalyzer a = new MorphAnalyzer(hspellPath); if (!a.IsInitialized) { MessageBox.Show("Error while trying to create a morphological analyzer object; please check the existance of the required data files and try again"); return; } analyzer = a; } // Recreate the index IndexWriter writer = new IndexWriter(FSDirectory.Open(tempPath), new Lucene.Net.Analysis.SimpleAnalyzer(), true, new IndexWriter.MaxFieldLength(10)); writer.Close(); } btnIndexAddFolder.Enabled = true; btnRunAutoTests.Enabled = true; btnExecuteSearch.Enabled = true; }
//删除全部索引 public void delAllIndex() { if (System.IO.Directory.Exists(indexPath) == false) { System.IO.Directory.CreateDirectory(indexPath); } FSDirectory fsDirectory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); if (!IndexReader.IndexExists(fsDirectory)) { return; } else { if (IndexReader.IsLocked(fsDirectory)) { IndexReader.Unlock(fsDirectory); } } Lucene.Net.Index.IndexWriter iw = new Lucene.Net.Index.IndexWriter(indexPath, new PanGuAnalyzer(), false); // iw.DeleteDocuments(new Lucene.Net.Index.Term("Key", key)); iw.DeleteAll(); iw.Optimize();//删除文件后并非从磁盘中移除,而是生成一个.del的文件,需要调用Optimize方法来清除。在清除文件前可以使用UndeleteAll方法恢复 iw.Close(); }
/// <summary> /// /// </summary> /// <param name="p"></param> /// <param name="writer"></param> private static void AddDocumentToIndex(Product p, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("Name", p.Name, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ) ); doc.Add(new Field("Origin", p.Origin.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ) ); doc.Add(new Field("Price", p.Price.ToString(), Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.YES ) ); writer.AddDocument(doc); }
void Index() { var conf = new IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT)); Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(dir, conf /*new Lucene.Net.Analysis.Core.WhitespaceAnalyzer(LuceneVersion.LUCENE_CURRENT), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED*/); Lucene.Net.Documents.Document doc = null; Lucene.Net.Documents.Field f = null; doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b c d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b a d", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "a b e f", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); doc = new Lucene.Net.Documents.Document(); f = new Lucene.Net.Documents.Field("field", "x y z", Lucene.Net.Documents.Field.Store.NO, Lucene.Net.Documents.Field.Index.ANALYZED); doc.Add(f); wr.AddDocument(doc); wr.Dispose(); }
static void Proc() { var uri = GetRandomWikiPage(); queue.Enqueue(uri); using (var dir = new Lucene.Net.Store.SimpleFSDirectory(new DirectoryInfo("..\\..\\idx"))) { using (var indexWriter = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH))) { while (true) { string page; if (queue.TryDequeue(out page)) { visited.AddOrUpdate(page, true, (p, b) => true); try { ProcessPage(page, indexWriter); } catch (Exception) { Console.WriteLine("ERROR"); } if (Console.KeyAvailable) { var x = Console.ReadKey(); if (x.Key == ConsoleKey.Spacebar) { break; } } } else { break; } } } } }
public virtual void TestDanish() { /* build an index */ RAMDirectory danishIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); // Danish collation orders the words below in the given order // (example taken from TestSort.testInternationalSort() ). System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" }; for (int docnum = 0; docnum < words.Length; ++docnum) { Document doc = new Document(); doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)danishIndex, true, null); IndexSearcher search = new IndexSearcher(reader); System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo; // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000, null).ScoreDocs; AssertEquals("The index Term should be included.", 1, result.Length); result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000, null).ScoreDocs; AssertEquals("The index Term should not be included.", 0, result.Length); search.Close(); }
public virtual void TestSorting() { Directory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(1000); writer.AddDocument(Adoc(new System.String[]{"id", "a", "title", "ipod", "str_s", "a"})); writer.AddDocument(Adoc(new System.String[]{"id", "b", "title", "ipod ipod", "str_s", "b"})); writer.AddDocument(Adoc(new System.String[]{"id", "c", "title", "ipod ipod ipod", "str_s", "c"})); writer.AddDocument(Adoc(new System.String[]{"id", "x", "title", "boosted", "str_s", "x"})); writer.AddDocument(Adoc(new System.String[]{"id", "y", "title", "boosted boosted", "str_s", "y"})); writer.AddDocument(Adoc(new System.String[]{"id", "z", "title", "boosted boosted boosted", "str_s", "z"})); IndexReader r = writer.GetReader(); writer.Close(); IndexSearcher searcher = new IndexSearcher(r); RunTest(searcher, true); RunTest(searcher, false); searcher.Close(); r.Close(); directory.Close(); }
public virtual void TestFarsi() { /* build an index */ RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); writer.Optimize(null); writer.Close(); IndexReader reader = IndexReader.Open((Directory)farsiIndex, true, null); IndexSearcher search = new IndexSearcher(reader); Query q = new TermQuery(new Term("body", "body")); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo; // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000, null).TotalHits; Assert.AreEqual(0, numHits, "The index Term should not be included."); numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000, null).TotalHits; Assert.AreEqual(1, numHits, "The index Term should be included."); search.Close(); }
private static RAMDirectory MakeEmptyIndex(int numDeletedDocs) { RAMDirectory d = new RAMDirectory(); IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, MaxFieldLength.LIMITED, null); for (int i = 0; i < numDeletedDocs; i++) { w.AddDocument(new Document(), null); } w.Commit(null); w.DeleteDocuments(null, new MatchAllDocsQuery()); w.Commit(null); if (0 < numDeletedDocs) { Assert.IsTrue(w.HasDeletions(null), "writer has no deletions"); } Assert.AreEqual(numDeletedDocs, w.MaxDoc(), "writer is missing some deleted docs"); Assert.AreEqual(0, w.NumDocs(null), "writer has non-deleted docs"); w.Close(); IndexReader r = IndexReader.Open((Directory)d, true, null); Assert.AreEqual(numDeletedDocs, r.NumDeletedDocs, "reader has wrong number of deleted docs"); r.Close(); return(d); }
/// <summary> /// Creates the index at indexPath /// </summary> /// <param name="indexPath">Directory path to create the index</param> public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); }
public LuceneIndexSearch(string analyzer_str) { luceneIndexDirectory = null; writer = null; switch (analyzer_str) { case "Simple Analyzer": analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); Console.WriteLine("Simple Analyzer"); break; case "Standard Analyzer": analyzer = analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); Console.WriteLine("Standard Analyzer"); break; case "Snowball Analyzer": // SnowballAnalyzer's second var "name" is the language of stemmer analyzer = new Lucene.Net.Analysis.Snowball.SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "English"); Console.WriteLine("Snowball Analyzer"); break; default: analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); break; } parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN_PASS_TEXT, analyzer); mySimilarity = new NewSimilarity(); }
public virtual void TestCompressionTools() { IFieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.Compress(System.Text.UTF8Encoding.UTF8.GetBytes(binaryValCompressed)), Field.Store.YES); IFieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.CompressString(binaryValCompressed), Field.Store.YES); Document doc = new Document(); doc.Add(binaryFldCompressed); doc.Add(stringFldCompressed); /* add the doc to a ram index */ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null); writer.AddDocument(doc, null); writer.Close(); /* open a reader and fetch the document */ IndexReader reader = IndexReader.Open((Directory)dir, false, null); Document docFromReader = reader.Document(0, null); Assert.IsTrue(docFromReader != null); /* fetch the binary compressed field and compare it's content with the original one */ System.String binaryFldCompressedTest = new System.String(System.Text.UTF8Encoding.UTF8.GetChars(CompressionTools.Decompress(docFromReader.GetBinaryValue("binaryCompressed", null)))); Assert.IsTrue(binaryFldCompressedTest.Equals(binaryValCompressed)); Assert.IsTrue(CompressionTools.DecompressString(docFromReader.GetBinaryValue("stringCompressed", null)).Equals(binaryValCompressed)); reader.Close(); dir.Close(); }
private void AddData(IndexWriter writer) { AddPoint(writer, "McCormick & Schmick's Seafood Restaurant", 38.9579000, -77.3572000); AddPoint(writer, "Jimmy's Old Town Tavern", 38.9690000, -77.3862000); AddPoint(writer, "Ned Devine's", 38.9510000, -77.4107000); AddPoint(writer, "Old Brogue Irish Pub", 38.9955000, -77.2884000); AddPoint(writer, "Alf Laylah Wa Laylah", 38.8956000, -77.4258000); AddPoint(writer, "Sully's Restaurant & Supper", 38.9003000, -77.4467000); AddPoint(writer, "TGI Friday", 38.8725000, -77.3829000); AddPoint(writer, "Potomac Swing Dance Club", 38.9027000, -77.2639000); AddPoint(writer, "White Tiger Restaurant", 38.9027000, -77.2638000); AddPoint(writer, "Jammin' Java", 38.9039000, -77.2622000); AddPoint(writer, "Potomac Swing Dance Club", 38.9027000, -77.2639000); AddPoint(writer, "WiseAcres Comedy Club", 38.9248000, -77.2344000); AddPoint(writer, "Glen Echo Spanish Ballroom", 38.9691000, -77.1400000); AddPoint(writer, "Whitlow's on Wilson", 38.8889000, -77.0926000); AddPoint(writer, "Iota Club and Cafe", 38.8890000, -77.0923000); AddPoint(writer, "Hilton Washington Embassy Row", 38.9103000, -77.0451000); AddPoint(writer, "HorseFeathers, Bar & Grill", 39.01220000000001, -77.3942); AddPoint(writer, "Marshall Island Airfield", 7.06, 171.2); AddPoint(writer, "Midway Island", 25.7, -171.7); AddPoint(writer, "North Pole Way", 55.0, 4.0); writer.Commit(); writer.Close(); }
public virtual void TestRAMDirectorySize() { Directory dir = FSDirectory.Open(indexDir); MockRAMDirectory ramDir = new MockRAMDirectory(dir); dir.Close(); IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED, null); writer.Optimize(null); Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); ThreadClass[] threads = new ThreadClass[numThreads]; for (int i = 0; i < numThreads; i++) { int num = i; threads[i] = new AnonymousClassThread(num, writer, ramDir, this); } for (int i = 0; i < numThreads; i++) { threads[i].Start(); } for (int i = 0; i < numThreads; i++) { threads[i].Join(); } writer.Optimize(null); Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); writer.Close(); }
/// <summary> /// 创建索引文档 /// </summary> /// <param name="dic"></param> public void AddLuceneIndex(Dictionary<string, string> dic) { //var analyzer = new StandardAnalyzer(Version.LUCENE_30); var analyzer = GetAnalyzer(); using (var directory = GetLuceneDirectory()) using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { var doc = new Document(); foreach (KeyValuePair<string, string> pair in dic) { // add new index entry //Field.Store.YES:表示是否存储原值。 //只有当Field.Store.YES在后面才能用doc.Get("number")取出值来 //Field.Index. NOT_ANALYZED:不进行分词保存 //todo:boost if (NotAnalyzeFields.Exists(one => one == pair.Key)) { doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.NOT_ANALYZED)); } else { doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.ANALYZED)); } } //doc.Boost writer.AddDocument(doc); writer.Commit(); writer.Optimize(); analyzer.Close(); } }
public override void SetUp() { base.SetUp(); System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex")); Directory dir = FSDirectory.Open(indexDir); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); // add some documents Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Document(); doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } Assert.AreEqual(docsToAdd, writer.MaxDoc()); writer.Close(); dir.Close(); }
private static void AddTextToIndex(int txts, string text, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("id", txts.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("postBody", text, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); }
// Creates index based on selection of analyzer public void CreateIndex(string indexPath, string name) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); if (name == "WhitespaceAnalyzer") { analyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); } if (name == "SimpleAnalyzer") { analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); } if (name == "StandardAnalyzer") { analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION); } if (name == "StopAnalyzer") { analyzer = new Lucene.Net.Analysis.StopAnalyzer(VERSION); } else { writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); } writer.SetSimilarity(customSimilarity); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
private void Add(System.String s, IndexWriter writer) { Document doc = new Document(); doc.Add(new Field("body", s, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); }
/// <summary> /// Remove text from the existing index. /// </summary> /// <param name="directoryIndexInfo">The directory infomation where the index files are located.</param> /// <param name="names">An array of unique names for the text.</param> public void RemoveText(DirectoryInfo directoryIndexInfo, string[] names) { Lucene.Net.Index.IndexWriter writer = null; Lucene.Net.Store.Directory directory = null; try { // If exists. if (names != null && names.Length > 0) { // Create the analyzer. SimpleAnalyzer simpleAnalyzer = new Analyzer.SimpleAnalyzer(); StandardAnalyzer standardAnalyzer = new Analyzer.StandardAnalyzer(simpleAnalyzer); // Create the index writer. directory = FSDirectory.Open(directoryIndexInfo); IndexWriterConfig indexConfig = new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, standardAnalyzer); indexConfig.SetOpenMode(IndexWriterConfig.OpenMode_e.APPEND); // Open existing or create new. writer = new IndexWriter(directory, indexConfig); // Create the query. List <Query> queries = new List <Query>(); // For each name. foreach (string name in names) { // Create the query. BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term("textname", name.ToLower())), BooleanClause.Occur.MUST); // Add the query. queries.Add(query); } // Delete the text. writer.DeleteDocuments(queries.ToArray()); // Commit the index. writer.Commit(); } } catch (Exception) { throw; } finally { if (writer != null) { writer.Dispose(); } if (directory != null) { directory.Dispose(); } } }
public virtual void TestBooleanQueryContainingSingleTermPrefixQuery() { // this tests against bug 33161 (now fixed) // In order to cause the bug, the outer query must have more than one term // and all terms required. // The contained PhraseMultiQuery must contain exactly one term array. RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Add("blueberry pie", writer); Add("blueberry chewing gum", writer); Add("blue raspberry pie", writer); writer.Optimize(null); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true, null); // This query will be equivalent to +body:pie +body:"blue*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("body", "pie")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix ScoreDoc[] hits = searcher.Search(q, null, 1000, null).ScoreDocs; Assert.AreEqual(2, hits.Length, "Wrong number of hits"); searcher.Close(); }
//END //this method creates document from an ObjectToIndex public void BuildIndex(FileToIndex file) { using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30)) { using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) { //check if document exists, if true deletes existing var searchQuery = new TermQuery(new Term("Id", file.Id.ToString())); idxw.DeleteDocuments(searchQuery); //creation Document doc = new Document(); doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED)); //write the document to the index idxw.AddDocument(doc); //optimize and close the writer idxw.Commit(); idxw.Optimize(); } } }
public virtual void TestPhrasePrefixWithBooleanQuery() { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()), true, IndexWriter.MaxFieldLength.LIMITED, null); Add("This is a test", "object", writer); Add("a note", "note", writer); writer.Close(); IndexSearcher searcher = new IndexSearcher(indexStore, true, null); // This query will be equivalent to +type:note +body:"a t*" BooleanQuery q = new BooleanQuery(); q.Add(new TermQuery(new Term("type", "note")), Occur.MUST); MultiPhraseQuery trouble = new MultiPhraseQuery(); trouble.Add(new Term("body", "a")); trouble.Add(new Term[] { new Term("body", "test"), new Term("body", "this") }); q.Add(trouble, Occur.MUST); // exception will be thrown here without fix for #35626: ScoreDoc[] hits = searcher.Search(q, null, 1000, null).ScoreDocs; Assert.AreEqual(0, hits.Length, "Wrong number of hits"); searcher.Close(); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0) { termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0) { termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory, true); }
public void CreateWriter() { IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); // TODO: Enter code to create the Lucene Writer writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); }
public virtual void TestTermEnum() { IndexWriter writer = null; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); // add 100 documents with term : aaa // add 100 documents with terms: aaa bbb // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100 for (int i = 0; i < 100; i++) { AddDoc(writer, "aaa"); AddDoc(writer, "aaa bbb"); } writer.Close(); // verify document frequency of terms in an unoptimized index VerifyDocFreq(); // merge segments by optimizing the index writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); // verify document frequency of terms in an optimized index VerifyDocFreq(); }
public LuceneInteractive() { luceneIndexDirectory = null; writer = null; analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); }
public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
public void IndexFile(string filePath) { PropertyDescriptors descriptors = new PropertyDescriptors(); descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml"); Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir)); IndexWriter iw = new IndexWriter(_idxDir, a, create); iw.SetUseCompoundFile(true); AdDataStream adStream = new AdDataStream(filePath); adStream.LoadData(); foreach (Advert ad in adStream.FetchAd()) { Document doc = new Document(); foreach (string s in ad.GetDictionary().Keys) { string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]); doc.Add(Field.Text(s, temp)); } iw.AddDocument(doc); if (_updateCallback != null) { _updateCallback("Added Document: " + ad["Title"]); } } iw.Optimize(); iw.Close(); }
public void CreateSearchIndex() { directory = new RAMDirectory(); analyzer = new StandardAnalyzer(Version.LUCENE_30); var ixw = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); LookupTable = new Dictionary<string, BaseContent>(); foreach (BaseContent p in Service.PoIs.ToList()) { var document = new Document(); document.Add(new Field("id", p.Id.ToString(), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); string all = p.Name + " "; foreach (MetaInfo mi in p.EffectiveMetaInfo) { string value; if (mi.Type != MetaTypes.text || !p.Labels.TryGetValue(mi.Label, out value)) continue; document.Add(new Field(mi.Label, value, Field.Store.YES, Field.Index.ANALYZED)); all += value + " "; } document.Add(new Field("All", all, Field.Store.YES, Field.Index.ANALYZED)); LookupTable[p.Id.ToString()] = p; ixw.AddDocument(document); } ixw.Commit(); }
public virtual void TestMultiValueSource() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); Field f = new Field("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.Add(f); for (int i = 0; i < 17; i++) { f.SetValue("" + i); w.AddDocument(doc); w.Commit(); } IndexReader r = w.GetReader(); w.Close(); Assert.IsTrue(r.GetSequentialSubReaders().Length > 1); ValueSource s1 = new IntFieldSource("field"); DocValues v1 = s1.GetValues(r); DocValues v2 = new MultiValueSource(s1).GetValues(r); for (int i = 0; i < r.MaxDoc(); i++) { Assert.AreEqual(v1.IntVal(i), i); Assert.AreEqual(v2.IntVal(i), i); } Lucene.Net.Search.FieldCache_Fields.DEFAULT.PurgeAllCaches(); r.Close(); dir.Close(); }
public override void SetUp() { base.SetUp(); System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", "X 4 5 6" }; index = new RAMDirectory(); IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < data.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i))); doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("all","all")); if (null != data[i]) { doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("data",data[i])); } writer.AddDocument(doc, null); } writer.Optimize(null); writer.Close(); r = IndexReader.Open(index, true, null); s = new IndexSearcher(r); //System.out.println("Set up " + getName()); }
/// <summary> /// index 1 document and commit. /// prepare for crashing. /// index 1 more document, and upon commit, creation of segments_2 will crash. /// </summary> private void IndexAndCrashOnCreateOutputSegments2() { Directory realDirectory = FSDirectory.Open(Path); CrashAfterCreateOutput crashAfterCreateOutput = new CrashAfterCreateOutput(realDirectory); // NOTE: cannot use RandomIndexWriter because it // sometimes commits: IndexWriter indexWriter = new IndexWriter(crashAfterCreateOutput, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); indexWriter.AddDocument(Document); // writes segments_1: indexWriter.Commit(); crashAfterCreateOutput.GetCrashAfterCreateOutput = "segments_2"; indexWriter.AddDocument(Document); try { // tries to write segments_2 but hits fake exc: indexWriter.Commit(); Assert.Fail("should have hit CrashingException"); } catch (CrashingException e) { // expected } // writes segments_3 indexWriter.Dispose(); Assert.IsFalse(SlowFileExists(realDirectory, "segments_2")); crashAfterCreateOutput.Dispose(); }
public void TestReadersWriters() { Directory dir; using(dir = new RAMDirectory()) { Document doc; IndexWriter writer; IndexReader reader; using (writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED)) { Field field = new Field("name", "value", Field.Store.YES,Field.Index.ANALYZED); doc = new Document(); doc.Add(field); writer.AddDocument(doc); writer.Commit(); using (reader = writer.GetReader()) { IndexReader r1 = reader.Reopen(); } Assert.Throws<AlreadyClosedException>(() => reader.Reopen(), "IndexReader shouldn't be open here"); } Assert.Throws<AlreadyClosedException>(() => writer.AddDocument(doc), "IndexWriter shouldn't be open here"); Assert.IsTrue(dir.isOpen_ForNUnit, "RAMDirectory"); } Assert.IsFalse(dir.isOpen_ForNUnit, "RAMDirectory"); }
public void MrsJones() { using (var dir = new RAMDirectory()) using (var analyzer = new LowerCaseKeywordAnalyzer()) { using (var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { var document = new Lucene.Net.Documents.Document(); document.Add(new Field("Name", "MRS. SHABA", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); writer.AddDocument(document); } var searcher = new IndexSearcher(dir, true); var termEnum = searcher.IndexReader.Terms(); while (termEnum.Next()) { var buffer = termEnum.Term.Text; Console.WriteLine(buffer); } var queryParser = new RangeQueryParser(Version.LUCENE_29, "", analyzer); var query = queryParser.Parse("Name:\"MRS. S*\""); Console.WriteLine(query); var result = searcher.Search(query, 10); Assert.NotEqual(0, result.TotalHits); } }
public virtual void TestAddBinaryTwice() { Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!"))); doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!"))); try { iwriter.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException expected) { // expected } iwriter.Dispose(); directory.Dispose(); }
public virtual void TestSimpleSkip() { Directory dir = new CountingRAMDirectory(this, new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat())).SetMergePolicy(NewLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(NewTextField(term.Field(), term.Text(), Field.Store.NO)); writer.AddDocument(d1); } writer.Commit(); writer.ForceMerge(1); writer.Dispose(); AtomicReader reader = GetOnlySegmentReader(DirectoryReader.Open(dir)); for (int i = 0; i < 2; i++) { Counter = 0; DocsAndPositionsEnum tp = reader.TermPositionsEnum(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
public void testMissingTerms() { String fieldName = "field1"; Directory rd = new RAMDirectory(); var w = new IndexWriter(rd, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { var doc = new Document(); int term = i*10; //terms are units of 10; doc.Add(new Field(fieldName, "" + term, Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Close(); TermsFilter tf = new TermsFilter(); tf.AddTerm(new Term(fieldName, "19")); FixedBitSet bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(0, bits.Cardinality(), "Must match nothing"); tf.AddTerm(new Term(fieldName, "20")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(1, bits.Cardinality(), "Must match 1"); tf.AddTerm(new Term(fieldName, "10")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); tf.AddTerm(new Term(fieldName, "00")); bits = (FixedBitSet) tf.GetDocIdSet(reader); Assert.AreEqual(2, bits.Cardinality(), "Must match 2"); reader.Close(); rd.Close(); }
public void CreateIndex(Analyzer analayer) { FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder)); IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories); foreach (string file in files) { string name = new FileInfo(file).Name; string content = File.ReadAllText(file); Document doc = new Document(); doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED)); indexWriter.AddDocument(doc); Console.WriteLine("{0} - {1}", file, name); } indexWriter.Optimize(); indexWriter.Dispose(); Console.WriteLine("File count: {0}", files.Length); }
/// <summary> /// Creates the index at a given path /// </summary> /// <param name="indexPath">The pathname to create the index</param> public void CreateIndex(string indexPath) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(indexPath + "/IndexStoredPosition"); IndexWriter.MaxFieldLength mfl = new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH); writer = new Lucene.Net.Index.IndexWriter(luceneIndexDirectory, analyzer, true, mfl); writer.SetSimilarity(mySimilarity); }
public void Searcher(string path) { luceneIndexDirectory = Lucene.Net.Store.FSDirectory.Open(path); writer = null; analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); parser = new QueryParser(VERSION, TEXT, analyzer); }