public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestDemo_Renamed() { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriter iwriter = new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fieldname", analyzer); Query query = parser.Parse("text"); ScoreDoc[] hits = isearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // Iterate through the results: for (int i = 0; i < hits.Length; i++) { Document hitDoc = isearcher.Doc(hits[i].Doc); Assert.AreEqual(hitDoc.Get("fieldname"), "This is the text to be indexed."); } isearcher.Close(); directory.Close(); }
public virtual void TestRAMDirectoryString() { MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.FullName); // Check size Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.Open(ramDir); Assert.AreEqual(docsToAdd, reader.NumDocs()); // open search zo check if all doc's are there IndexSearcher searcher = new IndexSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.Doc(i); Assert.IsTrue(doc.GetField("content") != null); } // cleanup reader.Close(); searcher.Close(); }
public virtual void TestMmapIndex() { Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway"); FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet <string>()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); IndexSearcher searcher = new IndexSearcher(storeDirectory, true, null); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first Hits hits = searcher.Search(query); Assert.AreEqual(1, hits.Length()); try { DoAssert(hits.Doc(0), true); } catch (System.Exception e) { System.Console.Error.WriteLine(e.StackTrace); System.Console.Error.Write("\n"); } finally { searcher.Close(); } }
public virtual void TestMmapIndex() { FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.FileInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(new System.Collections.Hashtable()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); IndexSearcher searcher = new IndexSearcher(storePathname); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestDemo_Renamed_Method() { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead (note that the // parameter true will overwrite the index in that directory // if one exists): //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.SetMaxFieldLength(25000); Document doc = new Document(); System.String text = "This is the text to be indexed."; doc.Add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.AddDocument(doc); iwriter.Close(); // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory); // Parse a simple query that searches for "text": Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser("fieldname", analyzer); Query query = parser.Parse("text"); Hits hits = isearcher.Search(query); Assert.AreEqual(1, hits.Length()); // Iterate through the results: for (int i = 0; i < hits.Length(); i++) { Document hitDoc = hits.Doc(i); Assert.AreEqual("This is the text to be indexed.", hitDoc.Get("fieldname")); } isearcher.Close(); directory.Close(); }
public void TestBooleanQuerySerialization() { Lucene.Net.Search.BooleanQuery lucQuery = new Lucene.Net.Search.BooleanQuery(); lucQuery.Add(new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("field", "x")), Lucene.Net.Search.BooleanClause.Occur.MUST); System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); bf.Serialize(ms, lucQuery); ms.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Search.BooleanQuery lucQuery2 = (Lucene.Net.Search.BooleanQuery)bf.Deserialize(ms); ms.Close(); Assert.AreEqual(lucQuery, lucQuery2, "Error in serialization"); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount = searcher.Search(lucQuery, 20).totalHits; searcher.Close(); searcher = new Lucene.Net.Search.IndexSearcher(dir, true); int hitCount2 = searcher.Search(lucQuery2, 20).totalHits; Assert.AreEqual(hitCount, hitCount2, "Error in serialization - different hit counts"); }
private int GetHitCount(Directory dir, Term term) { IndexSearcher searcher = new IndexSearcher(dir, true, null); int hitCount = searcher.Search(new TermQuery(term), null, 1000, null).TotalHits; searcher.Close(); return(hitCount); }
public virtual void TestFieldSetValue() { Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); Document doc = new Document(); doc.Add(field); doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); field.SetValue("id2"); writer.AddDocument(doc); field.SetValue("id3"); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.Doc(hits[i].doc); Field f = doc2.GetField("id"); if (f.StringValue().Equals("id1")) { result |= 1; } else if (f.StringValue().Equals("id2")) { result |= 2; } else if (f.StringValue().Equals("id3")) { result |= 4; } else { Assert.Fail("unexpected id field"); } } searcher.Close(); dir.Close(); Assert.AreEqual(7, result, "did not see all IDs"); }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexNoAdds(System.String dirName) { dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length, "wrong number of hits"); Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); searcher.Close(); // make sure we can do a delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.IndexReader); searcher.Close(); // optimize IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.IndexReader); searcher.Close(); dir.Close(); }
public virtual void TestSetBufferSize() { System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom()); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open(dir); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(37, reader.DocFreq(ccc)); reader.DeleteDocument(0); Assert.AreEqual(37, reader.DocFreq(aaa)); dir.tweakBufferSizes(); reader.DeleteDocument(4); Assert.AreEqual(reader.DocFreq(bbb), 37); dir.tweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.tweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000).scoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
public int GetDocumentCount() { int num; IndexSearcher searcher = this.CreateSearcher(true); try { num = searcher.Reader.NumDocs(); } finally { searcher.Close(); } return(num); }
override public void Run() { IndexSearcher searcher = null; Query query = new TermQuery(new Term("content", "aaa")); for (int i = 0; i < this.numIteration; i++) { try { searcher = new IndexSearcher(dir); } catch (System.Exception e) { hitException = true; System.Console.Out.WriteLine("Stress Test Index Searcher: create hit unexpected exception: " + e.ToString()); System.Console.Out.WriteLine(e.StackTrace); break; } if (searcher != null) { ScoreDoc[] hits = null; try { hits = searcher.Search(query, null, 1000).scoreDocs; } catch (System.IO.IOException e) { hitException = true; System.Console.Out.WriteLine("Stress Test Index Searcher: search hit unexpected exception: " + e.ToString()); System.Console.Out.WriteLine(e.StackTrace); break; } // System.out.println(hits.length() + " total results"); try { searcher.Close(); } catch (System.IO.IOException e) { hitException = true; System.Console.Out.WriteLine("Stress Test Index Searcher: close hit unexpected exception: " + e.ToString()); System.Console.Out.WriteLine(e.StackTrace); break; } searcher = null; } } }
public static void Main(System.String[] args) { System.String index = @"c:\EmailTest\LuceneDB"; IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.FileInfo(index)), true); // only searching, so read-only=true Searcher searcher = new IndexSearcher(reader); if (Stopwatch.IsHighResolution) { System.Console.WriteLine("We have a high resolution timer with an frequency of {0} ticks/ms", Stopwatch.Frequency/1000); } searchFor(searcher, "jeske AND neotonic"); searchFor(searcher, "noticed AND problems"); searchFor(searcher, "data AND returned"); searchFor(searcher, "scott AND hassan"); searcher.Close(); reader.Close(); System.Console.WriteLine("done"); }
public static bool PreviouslyIndexed(string url) { string indexFileLocation = indexDir; Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir); Lucene.Net.Search.Hits hits = null; try { Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", url)); hits = searcher.Search(query); } catch { } finally { searcher.Close(); } return hits.Length() > 0; }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); DoAssert(searcher.Doc(hits[0].doc), true); searcher.Close(); }
public virtual void TestStopWordSearching() { Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT); Directory ramDir = new RAMDirectory(); var iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); var doc = new Document(); doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); iw.Close(); var mfqp = new MultiFieldQueryParser(Util.Version.LUCENE_CURRENT, new[] { "body" }, analyzer); mfqp.DefaultOperator = QueryParser.Operator.AND; var q = mfqp.Parse("the footest"); var is_Renamed = new IndexSearcher(ramDir, true); var hits = is_Renamed.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); is_Renamed.Close(); }
public virtual void TestStopWordSearching() { Analyzer analyzer = new StandardAnalyzer(); Directory ramDir = new RAMDirectory(); IndexWriter iw = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED)); iw.AddDocument(doc); iw.Close(); MultiFieldQueryParser mfqp = new MultiFieldQueryParser(new System.String[] { "body" }, analyzer); mfqp.SetDefaultOperator(QueryParser.Operator.AND); Query q = mfqp.Parse("the footest"); IndexSearcher is_Renamed = new IndexSearcher(ramDir); ScoreDoc[] hits = is_Renamed.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); is_Renamed.Close(); }
public static List<IndexedItem> SearchProjects(string s) { List<IndexedItem> retVal = new List<IndexedItem>(); string indexFileLocation = indexDir; Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(dir); try { Lucene.Net.Search.Query query = new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("content", s)); query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("url", fromUrl)) }); query = query.Combine(new Lucene.Net.Search.Query[] { query, new Lucene.Net.Search.TermQuery(new Lucene.Net.Index.Term("title", s)) }); //execute the query Lucene.Net.Search.Hits hits = searcher.Search(query); //iterate over the results. for (int i = 0; i < hits.Length(); i++) { Lucene.Net.Documents.Document doc = hits.Doc(i); string article = doc.Get("content"); string title = doc.Get("title"); string url = doc.Get("url"); retVal.Add(new IndexedItem { Article = article, Href = url, Title = title }); } foreach (IndexedItem ind in retVal) { Console.WriteLine(ind.Href); } retVal = retVal.Distinct().ToList(); } catch { } finally { searcher.Close(); } return retVal; }
public void Test_Store_RAMDirectory() { Lucene.Net.Store.RAMDirectory ramDIR = new Lucene.Net.Store.RAMDirectory(); //Index 1 Doc Lucene.Net.Index.IndexWriter wr = new Lucene.Net.Index.IndexWriter(ramDIR, new Lucene.Net.Analysis.WhitespaceAnalyzer(), true); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //now serialize it System.Runtime.Serialization.Formatters.Binary.BinaryFormatter serializer = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(); System.IO.MemoryStream memoryStream = new System.IO.MemoryStream(); serializer.Serialize(memoryStream, ramDIR); //Close DIR ramDIR.Close(); ramDIR = null; //now deserialize memoryStream.Seek(0, System.IO.SeekOrigin.Begin); Lucene.Net.Store.RAMDirectory ramDIR2 = (Lucene.Net.Store.RAMDirectory)serializer.Deserialize(memoryStream); //Add 1 more doc wr = new Lucene.Net.Index.IndexWriter(ramDIR2, new Lucene.Net.Analysis.WhitespaceAnalyzer(), false); doc = new Lucene.Net.Documents.Document(); doc.Add(new Lucene.Net.Documents.Field("field1", "value1 value11", Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED)); wr.AddDocument(doc); wr.Close(); //Search Lucene.Net.Search.IndexSearcher s = new Lucene.Net.Search.IndexSearcher(ramDIR2); Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field1", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query q = qp.Parse("value1"); Lucene.Net.Search.TopDocs topDocs = s.Search(q, 100); s.Close(); Assert.AreEqual(topDocs.totalHits, 2, "See the issue: LUCENENET-174"); }
public virtual void TestGetValuesForIndexedDocument() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(MakeDocumentWithFields()); writer.Close(); Searcher searcher = new IndexSearcher(dir); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(1, hits.Length); DoAssert(searcher.Doc(hits[0].doc), true); searcher.Close(); }
private static void terminology(string languageModelFile, string inputFile, bool keepIntermedyaryFiles, string lang, bool alreadyProcessed) { List <string> terms = new List <string>(); //HashSet<string> generalTerms = new HashSet<string>(); //if (lang == "ro") //{ // generalTerms = DataStructReader.readHashSet("gtRo.txt", Encoding.UTF8, 0, '\t', true, null); //} //else if (lang == "en") //{ // generalTerms = DataStructReader.readHashSet("gtEn.txt", Encoding.UTF8, 0, '\t', true, null); //} Dictionary <string, double> ncounts = DataStructReader.readDictionaryD(languageModelFile, Encoding.UTF8, 0, 1, '\t', false, null, null); if (ncounts.Count == 0) { Console.WriteLine("Language Model Missing... Press key for aborting!"); Console.ReadLine(); } else { Dictionary <string, double> userCounts = new Dictionary <string, double>(); double total = 0; if (!File.Exists(inputFile)) { Console.WriteLine("Input File doesn't exist... Press key for aborting!"); Console.ReadLine(); } else { Dictionary <string, string> fileCorrespondences = new Dictionary <string, string>(); string line = ""; StreamReader rdr = new StreamReader(inputFile, Encoding.UTF8); while ((line = rdr.ReadLine()) != null) { string[] parts = line.Trim().Split('\t'); if (!fileCorrespondences.ContainsKey(parts[0])) { fileCorrespondences.Add(parts[0], parts[1]); } } string[] files = fileCorrespondences.Keys.ToArray(); Dictionary <string, Dictionary <string, int> > singleOccurencesFirst = new Dictionary <string, Dictionary <string, int> >(); StreamWriter wrtProcessed = new StreamWriter("_preprocessed", false, Encoding.UTF8); wrtProcessed.AutoFlush = true; foreach (string file in files) { if (alreadyProcessed) { Console.Write("\nReading file: {0}", file); } else { Console.WriteLine("\nProcessing file: {0}", file); } getUserCounts(ref userCounts, ref singleOccurencesFirst, file, wrtProcessed, ref total, lang, alreadyProcessed); //Console.WriteLine(" ... done!"); } wrtProcessed.Close(); Console.Write("Extracting single word terms"); foreach (string key in userCounts.Keys.ToArray()) { if (userCounts[key] < 2 /*|| generalTerms.Contains(key)*/) { userCounts.Remove(key); } else { userCounts[key] = userCounts[key] / total; } } Dictionary <string, List <string> > singleOccurences = getSingle(singleOccurencesFirst); Dictionary <string, double> results = new Dictionary <string, double>(); foreach (string word in userCounts.Keys) { double newVal = 0; if (ncounts.ContainsKey(word)) { newVal = userCounts[word] / ncounts[word]; } else { newVal = userCounts[word] / ncounts["_dummy_"]; } results.Add(word, newVal); } string[] keys = results.Keys.ToArray(); double[] values = results.Values.ToArray(); Array.Sort(values, keys); StreamWriter wrt = new StreamWriter("_monoTerms", false, Encoding.UTF8); wrt.AutoFlush = true; for (int i = keys.Length - 1; i >= 0; i--) { wrt.WriteLine("{0}\t{1}", keys[i], values[i]); } wrt.Close(); Console.WriteLine(" ... done!"); Console.Write("Extracting multi word terms"); ColocationExtractor ce = new ColocationExtractor(); Dictionary <string, List <string> > multiOccurences = new Dictionary <string, List <string> >(); if (ce.extractCollocations("_preprocessed", "_multiTerms")) { Console.WriteLine(" ... done!"); Console.Write("Create index for extracting exact occurences"); if (!Directory.Exists("_index")) { Directory.CreateDirectory("_index"); } ce.indexText("_preprocessed", "_index"); Console.WriteLine(" ... done!"); Console.Write("Search for exact occurences"); Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher("_index"); multiOccurences = retriveOccurences(ce, searcher, "_multiTerms"); Console.WriteLine(" ... done!"); searcher.Close(); string[] filesToDel = Directory.GetFiles("_index"); foreach (string f in filesToDel) { File.Delete(f); } Directory.Delete("_index"); } else { Console.WriteLine(" ... done! - no multi word terms found!"); } Console.Write("Retrieving terminology"); terms = extractTerminology("_monoTerms", "_multiTerms", singleOccurences, multiOccurences); if (keepIntermedyaryFiles) { StreamWriter wrtT = new StreamWriter("_terminology", false, Encoding.UTF8); wrtT.AutoFlush = true; foreach (string term in terms) { wrtT.WriteLine(term); } wrtT.Close(); } Console.WriteLine(" ... done!"); HashSet <string> mono = new HashSet <string>(); Dictionary <string, HashSet <string> > multi = new Dictionary <string, HashSet <string> >(); HashSet <string> multiOrg = new HashSet <string>(); getTerms(terms, ref mono, ref multi, ref multiOrg); markTerms(lang, fileCorrespondences, mono, multi, multiOrg, alreadyProcessed); if (!keepIntermedyaryFiles) { File.Delete("_preprocessed"); File.Delete("_monoTerms"); File.Delete("_multiTerms"); } } } }
public static void Main(System.String[] args) { try { Searcher searcher = new IndexSearcher(@"index"); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); while (true) { System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line.Length == -1) { break; } Query query = QueryParser.Parse(line, "contents", analyzer); System.Console.Out.WriteLine("Searching for: " + query.ToString("contents")); Hits hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine(i + ". " + path); } else { System.String url = doc.Get("url"); if (url != null) { System.Console.Out.WriteLine(i + ". " + url); System.Console.Out.WriteLine(" - " + doc.Get("title")); } else { System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document"); } } } if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') { break; } } } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public virtual void TestMmapIndex() { Assert.Ignore("Need to port tests, but we don't really support MMapDirectories anyway"); FSDirectory storeDirectory; storeDirectory = new MMapDirectory(new System.IO.DirectoryInfo(storePathname), null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. StandardAnalyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT, Support.Compatibility.SetFactory.CreateHashSet<string>()); // TODO: something about lock timeouts and leftover locks. IndexWriter writer = new IndexWriter(storeDirectory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); IndexSearcher searcher = new IndexSearcher(storeDirectory, true); for (int dx = 0; dx < 1000; dx++) { System.String f = RandomField(); Document doc = new Document(); doc.Add(new Field("data", f, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } searcher.Close(); writer.Close(); RmDir(new System.IO.FileInfo(storePathname)); }
public virtual void TestCommitOnClose() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); for (int i = 0; i < 14; i++) { AddDoc(writer); } writer.Close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "first number of hits"); searcher.Close(); IndexReader reader = IndexReader.Open(dir); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); for (int i = 0; i < 3; i++) { for (int j = 0; j < 11; j++) { AddDoc(writer); } searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled"); searcher.Close(); Assert.IsTrue(reader.IsCurrent(), "reader should have still been current"); } // Now, close the writer: writer.Close(); Assert.IsFalse(reader.IsCurrent(), "reader should not be current now"); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(47, hits.Length(), "reader did not see changes after writer was closed"); searcher.Close(); }
public virtual void searchIndex(System.String dirName, System.String oldName) { //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer()); //Query query = parser.parse("handle:1"); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); IndexSearcher searcher = new IndexSearcher(dir, true); IndexReader reader = searcher.IndexReader; _TestUtil.CheckIndex(dir); for (int i = 0; i < 35; i++) { if (!reader.IsDeleted(i)) { Document d = reader.Document(i); var fields = d.GetFields(); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { if (d.GetField("content3") == null) { int numFields = oldName.StartsWith("29.") ? 7 : 5; Assert.AreEqual(numFields, fields.Count); Field f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = (Field)d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = (Field)d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = (Field)d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } } } // Only ID 7 is deleted else { Assert.AreEqual(7, i); } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #21 since it's norm was // increased: Document d2 = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d2.Get("id"), "didn't get the right document first"); TestHits(hits, 34, searcher.IndexReader); if (!oldName.StartsWith("19.") && !oldName.StartsWith("20.") && !oldName.StartsWith("21.") && !oldName.StartsWith("22.")) { // Test on indices >= 2.3 hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); } searcher.Close(); dir.Close(); }
public void TestNegativePositions() { SinkTokenizer tokens = new SinkTokenizer(); Token t = new Token(); t.SetTermText("a"); t.SetPositionIncrement(0); tokens.Add(t); t.SetTermText("b"); t.SetPositionIncrement(1); tokens.Add(t); t.SetTermText("c"); tokens.Add(t); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true); Document doc = new Document(); doc.Add(new Field("field", tokens)); w.AddDocument(doc); w.Close(); IndexSearcher s = new IndexSearcher(dir); PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "a")); pq.Add(new Term("field", "b")); pq.Add(new Term("field", "c")); Hits hits = s.Search(pq); Assert.AreEqual(1, hits.Length()); Query q = new SpanTermQuery(new Term("field", "a")); hits = s.Search(q); Assert.AreEqual(1, hits.Length()); TermPositions tps = s.GetIndexReader().TermPositions(new Term("field", "a")); Assert.IsTrue(tps.Next()); Assert.AreEqual(1, tps.Freq()); Assert.AreEqual(-1, tps.NextPosition()); Assert.IsTrue(_TestUtil.CheckIndex(dir)); s.Close(); dir.Close(); }
public virtual void TestAddIndexOnDiskFull() { int START_COUNT = 57; int NUM_DIR = 50; int END_COUNT = START_COUNT + NUM_DIR * 25; bool debug = false; // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for (int i = 0; i < NUM_DIR; i++) { dirs[i] = new RAMDirectory(); IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true); for (int j = 0; j < 25; j++) { AddDocWithIndex(writer, 25 * i + j); } writer.Close(); System.String[] files = dirs[i].List(); for (int j = 0; j < files.Length; j++) { inputDiskUsage += dirs[i].FileLength(files[j]); } } // Now, build a starting index that has START_COUNT docs. We // will then try to addIndexes into a copy of this: RAMDirectory startDir = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(startDir, new WhitespaceAnalyzer(), true); for (int j = 0; j < START_COUNT; j++) { AddDocWithIndex(writer2, j); } writer2.Close(); // Make sure starting index seems to be working properly: Term searchTerm = new Term("content", "aaa"); IndexReader reader = IndexReader.Open(startDir); Assert.AreEqual(57, reader.DocFreq(searchTerm), "first docFreq"); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(57, hits.Length(), "first number of hits"); searcher.Close(); reader.Close(); // Iterate with larger and larger amounts of free // disk space. With little free disk space, // addIndexes will certainly run out of space & // fail. Verify that when this happens, index is // not corrupt and index in fact has added no // documents. Then, we increase disk space by 2000 // bytes each iteration. At some point there is // enough free disk space and addIndexes should // succeed and index should show all documents were // added. // String[] files = startDir.list(); long diskUsage = startDir.SizeInBytes(); long startDiskUsage = 0; System.String[] files2 = startDir.List(); for (int i = 0; i < files2.Length; i++) { startDiskUsage += startDir.FileLength(files2[i]); } for (int iter = 0; iter < 6; iter++) { if (debug) System.Console.Out.WriteLine("TEST: iter=" + iter); // Start with 100 bytes more than we are currently using: long diskFree = diskUsage + 100; bool autoCommit = iter % 2 == 0; int method = iter / 2; bool success = false; bool done = false; System.String methodName; if (0 == method) { methodName = "addIndexes(Directory[])"; } else if (1 == method) { methodName = "addIndexes(IndexReader[])"; } else { methodName = "addIndexesNoOptimize(Directory[])"; } while (!done) { // Make a new dir that will enforce disk usage: MockRAMDirectory dir = new MockRAMDirectory(startDir); writer2 = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); System.IO.IOException err = null; MergeScheduler ms = writer2.GetMergeScheduler(); for (int x = 0; x < 2; x++) { if (ms is ConcurrentMergeScheduler) // This test intentionally produces exceptions // in the threads that CMS launches; we don't // want to pollute test output with these. if (0 == x) ((ConcurrentMergeScheduler)ms).SetSuppressExceptions_ForNUnitTest(); else ((ConcurrentMergeScheduler) ms).ClearSuppressExceptions_ForNUnitTest(); // Two loops: first time, limit disk space & // throw random IOExceptions; second time, no // disk space limit: double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; System.String testName = null; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes autoCommit=" + autoCommit; } else { thisDiskFree = 0; rate = 0.0; if (debug) testName = "disk full test " + methodName + " with unlimited disk space autoCommit=" + autoCommit; } if (debug) System.Console.Out.WriteLine("\ncycle: " + testName); dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == method) { writer2.AddIndexes(dirs); } else if (1 == method) { IndexReader[] readers = new IndexReader[dirs.Length]; for (int i = 0; i < dirs.Length; i++) { readers[i] = IndexReader.Open(dirs[i]); } try { writer2.AddIndexes(readers); } finally { for (int i = 0; i < dirs.Length; i++) { readers[i].Close(); } } } else { writer2.AddIndexesNoOptimize(dirs); } success = true; if (debug) { System.Console.Out.WriteLine(" success!"); } if (0 == x) { done = true; } } catch (System.IO.IOException e) { success = false; err = e; if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); System.Console.Out.WriteLine(e.StackTrace); } if (1 == x) { System.Console.Out.WriteLine(e.StackTrace); Assert.Fail(methodName + " hit IOException after disk space was freed up"); } } // Make sure all threads from // ConcurrentMergeScheduler are done _TestUtil.SyncConcurrentMerges(writer2); if (autoCommit) { // Whether we succeeded or failed, check that // all un-referenced files were in fact // deleted (ie, we did not create garbage). // Only check this when autoCommit is true: // when it's false, it's expected that there // are unreferenced files (ie they won't be // referenced until the "commit on close"). // Just create a new IndexFileDeleter, have it // delete unreferenced files, then verify that // in fact no files were deleted: System.String successStr; if (success) { successStr = "success"; } else { successStr = "IOException"; } System.String message = methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes)"; AssertNoUnreferencedFiles(dir, message); } if (debug) { System.Console.Out.WriteLine(" now test readers"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs added, and if we // failed, we see either all docs or no docs added // (transactional semantics): try { reader = IndexReader.Open(dir); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when creating IndexReader: " + e); } int result = reader.DocFreq(searchTerm); if (success) { if (autoCommit && result != END_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT); } else if (!autoCommit && result != START_COUNT) { Assert.Fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " [autoCommit = false]"); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { System.Console.Out.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher = new IndexSearcher(reader); try { hits = searcher.Search(new TermQuery(searchTerm)); } catch (System.IO.IOException e) { System.Console.Out.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length(); if (success) { if (result2 != result) { Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } else { // On hitting exception we still may have added // all docs: if (result2 != result) { System.Console.Out.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result); } } searcher.Close(); reader.Close(); if (debug) { System.Console.Out.WriteLine(" count is " + result); } if (done || result == END_COUNT) { break; } } if (debug) { System.Console.Out.WriteLine(" start disk = " + startDiskUsage + "; input disk = " + inputDiskUsage + "; max used = " + dir.GetMaxUsedSizeInBytes()); } if (done) { // Javadocs state that temp free Directory space // required is at most 2X total input size of // indices so let's make sure: Assert.IsTrue( (dir.GetMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage), "max free Directory space required exceeded 1X the total input index sizes during " + methodName + ": max temp usage = " + (dir.GetMaxUsedSizeInBytes() - startDiskUsage) + " bytes; " + "starting disk usage = " + startDiskUsage + " bytes; " + "input index disk usage = " + inputDiskUsage + " bytes" ); } writer2.Close(); // Wait for all BG threads to finish else // dir.close() will throw IOException because // there are still open files _TestUtil.SyncConcurrentMerges(ms); dir.Close(); // Try again with 2000 more bytes of free space: diskFree += 2000; } } startDir.Close(); }
public virtual void TestEnablingNorms() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, pre flush for (int j = 0; j < 10; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 8) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); Term searchTerm = new Term("field", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(10, hits.Length()); searcher.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, post flush for (int j = 0; j < 27; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 26) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(27, hits.Length()); searcher.Close(); IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.Close(); }
public virtual void TestDiverseDocs() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetRAMBufferSizeMB(0.5); System.Random rand = new System.Random((System.Int32) 31415); for (int i = 0; i < 3; i++) { // First, docs where every term is unique (heavy on // Posting instances) for (int j = 0; j < 100; j++) { Document doc = new Document(); for (int k = 0; k < 100; k++) { doc.Add(new Field("field", System.Convert.ToString(rand.Next()), Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } // Next, many single term docs where only one term // occurs (heavy on byte blocks) for (int j = 0; j < 100; j++) { Document doc = new Document(); doc.Add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } // Next, many single term docs where only one term // occurs but the terms are very long (heavy on // char[] arrays) for (int j = 0; j < 100; j++) { System.Text.StringBuilder b = new System.Text.StringBuilder(); System.String x = System.Convert.ToString(j) + "."; for (int k = 0; k < 1000; k++) b.Append(x); System.String longTerm = b.ToString(); Document doc = new Document(); doc.Add(new Field("field", longTerm, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } } writer.Close(); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(new Term("field", "aaa"))); Assert.AreEqual(300, hits.Length()); searcher.Close(); dir.Close(); }
public virtual void TestCommitOnCloseAbort() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); for (int i = 0; i < 14; i++) { AddDoc(writer); } writer.Close(); Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "first number of hits"); searcher.Close(); writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.SetMaxBufferedDocs(10); for (int j = 0; j < 17; j++) { AddDoc(writer); } // Delete all docs: writer.DeleteDocuments(searchTerm); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled"); searcher.Close(); // Now, close the writer: writer.Abort(); AssertNoUnreferencedFiles(dir, "unreferenced files remain after abort()"); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "saw changes after writer.abort"); searcher.Close(); // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false); writer.SetMaxBufferedDocs(10); for (int i = 0; i < 12; i++) { for (int j = 0; j < 17; j++) { AddDoc(writer); } searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(14, hits.Length(), "reader incorrectly sees changes from writer with autoCommit disabled"); searcher.Close(); } writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(218, hits.Length(), "didn't see changes after close"); searcher.Close(); dir.Close(); }
public static void Main(System.String[] args) { try { Searcher searcher = new IndexSearcher(@"index"); Analyzer analyzer = new StandardAnalyzer(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); while (true) { System.Console.Out.Write("Query: "); System.String line = in_Renamed.ReadLine(); if (line.Length == - 1) break; Query query = QueryParser.Parse(line, "contents", analyzer); System.Console.Out.WriteLine("Searching for: " + query.ToString("contents")); Hits hits = searcher.Search(query); System.Console.Out.WriteLine(hits.Length() + " total matching documents"); int HITS_PER_PAGE = 10; for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE) { int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { Document doc = hits.Doc(i); System.String path = doc.Get("path"); if (path != null) { System.Console.Out.WriteLine(i + ". " + path); } else { System.String url = doc.Get("url"); if (url != null) { System.Console.Out.WriteLine(i + ". " + url); System.Console.Out.WriteLine(" - " + doc.Get("title")); } else { System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document"); } } } if (hits.Length() > end) { System.Console.Out.Write("more (y/n) ? "); line = in_Renamed.ReadLine(); if (line.Length == 0 || line[0] == 'n') break; } } } searcher.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
/// <summary> Make sure if modifier tries to commit but hits disk full that modifier /// remains consistent and usable. Similar to TestIndexReader.testDiskFull(). /// </summary> private void TestOperationsOnDiskFull(bool updates) { bool debug = false; Term searchTerm = new Term("content", "aaa"); int START_COUNT = 157; int END_COUNT = 144; for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); // First build up a starting index: MockRAMDirectory startDir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(startDir, autoCommit, new WhitespaceAnalyzer(), true); for (int i = 0; i < 157; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.Close(); long diskUsage = startDir.SizeInBytes(); long diskFree = diskUsage + 10; System.IO.IOException err = null; bool done = false; // Iterate w/ ever increasing free disk space: while (!done) { MockRAMDirectory dir = new MockRAMDirectory(startDir); dir.SetPreventDoubleWrite(false); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); modifier.SetMaxBufferedDocs(1000); // use flush or close modifier.SetMaxBufferedDeleteTerms(1000); // use flush or close // For each disk size, first try to commit against // dir that will hit random IOExceptions & disk // full; after, give it infinite disk space & turn // off random IOExceptions & retry w/ same reader: bool success = false; for (int x = 0; x < 2; x++) { double rate = 0.1; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; System.String testName; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) { System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes"); } testName = "disk full during reader.close() @ " + thisDiskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (debug) { System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space"); } testName = "reader re-use after disk full"; } dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == x) { int docId = 12; for (int i = 0; i < 13; i++) { if (updates) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); modifier.UpdateDocument(new Term("id", System.Convert.ToString(docId)), d); } else { // deletes modifier.DeleteDocuments(new Term("id", System.Convert.ToString(docId))); // modifier.setNorm(docId, "contents", (float)2.0); } docId += 12; } } modifier.Close(); success = true; if (0 == x) { done = true; } } catch (System.IO.IOException e) { if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); System.Console.Out.WriteLine(e.StackTrace); } err = e; if (1 == x) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + " hit IOException after disk space was freed up"); } } // If the close() succeeded, make sure there are // no unreferenced files. if (success) { Lucene.Net.Util._TestUtil.CheckIndex(dir); TestIndexWriter.AssertNoUnreferencedFiles(dir, "after writer.close"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs // changed (transactional semantics): IndexReader newReader = null; try { newReader = IndexReader.Open(dir); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e); } IndexSearcher searcher = new IndexSearcher(newReader); ScoreDoc[] hits = null; try { hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (x == 0 && result2 != END_COUNT) { Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT); } else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) { // It's possible that the first exception was // "recoverable" wrt pending deletes, in which // case the pending deletes are retained and // then re-flushing (with plenty of disk // space) will succeed in flushing the // deletes: Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result2 != START_COUNT && result2 != END_COUNT) { System.Console.Error.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher.Close(); newReader.Close(); if (result2 == END_COUNT) { break; } } dir.Close(); // Try again with 10 more bytes of free space: diskFree += 10; } } }
/// <summary> Make sure if modifier tries to commit but hits disk full that modifier /// remains consistent and usable. Similar to TestIndexReader.testDiskFull(). /// </summary> private void TestOperationsOnDiskFull(bool updates) { bool debug = false; Term searchTerm = new Term("content", "aaa"); int START_COUNT = 157; int END_COUNT = 144; // First build up a starting index: MockRAMDirectory startDir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null); for (int i = 0; i < 157; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d, null); } writer.Close(); long diskUsage = startDir.SizeInBytes(); long diskFree = diskUsage + 10; System.IO.IOException err = null; bool done = false; // Iterate w/ ever increasing free disk space: while (!done) { MockRAMDirectory dir = new MockRAMDirectory(startDir); dir.SetPreventDoubleWrite(false); IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null); modifier.SetMaxBufferedDocs(1000); // use flush or close modifier.SetMaxBufferedDeleteTerms(1000); // use flush or close // For each disk size, first try to commit against // dir that will hit random IOExceptions & disk // full; after, give it infinite disk space & turn // off random IOExceptions & retry w/ same reader: bool success = false; for (int x = 0; x < 2; x++) { double rate = 0.1; double diskRatio = ((double)diskFree) / diskUsage; long thisDiskFree; System.String testName; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) { System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes"); } testName = "disk full during reader.close() @ " + thisDiskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (debug) { System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space"); } testName = "reader re-use after disk full"; } dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == x) { int docId = 12; for (int i = 0; i < 13; i++) { if (updates) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); modifier.UpdateDocument(new Term("id", System.Convert.ToString(docId)), d, null); } else { // deletes modifier.DeleteDocuments(null, new Term("id", System.Convert.ToString(docId))); // modifier.setNorm(docId, "contents", (float)2.0); } docId += 12; } } modifier.Close(); success = true; if (0 == x) { done = true; } } catch (System.IO.IOException e) { if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); System.Console.Out.WriteLine(e.StackTrace); } err = e; if (1 == x) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + " hit IOException after disk space was freed up"); } } // If the close() succeeded, make sure there are // no unreferenced files. if (success) { Lucene.Net.Util._TestUtil.CheckIndex(dir); TestIndexWriter.AssertNoUnreferencedFiles(dir, "after writer.close"); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs // changed (transactional semantics): IndexReader newReader = null; try { newReader = IndexReader.Open((Directory)dir, true, null); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e); } IndexSearcher searcher = new IndexSearcher(newReader); ScoreDoc[] hits = null; try { hits = searcher.Search(new TermQuery(searchTerm), null, 1000, null).ScoreDocs; } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (x == 0 && result2 != END_COUNT) { Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT); } else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) { // It's possible that the first exception was // "recoverable" wrt pending deletes, in which // case the pending deletes are retained and // then re-flushing (with plenty of disk // space) will succeed in flushing the // deletes: Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result2 != START_COUNT && result2 != END_COUNT) { System.Console.Error.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } searcher.Close(); newReader.Close(); if (result2 == END_COUNT) { break; } } dir.Close(); // Try again with 10 more bytes of free space: diskFree += 10; } }
public virtual void TestDiskFull() { bool debug = false; Term searchTerm = new Term("content", "aaa"); int START_COUNT = 157; int END_COUNT = 144; // First build up a starting index: RAMDirectory startDir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 157; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.Close(); long diskUsage = startDir.SizeInBytes(); long diskFree = diskUsage + 100; System.IO.IOException err = null; bool done = false; // Iterate w/ ever increasing free disk space: while (!done) { MockRAMDirectory dir = new MockRAMDirectory(startDir); // If IndexReader hits disk full, it can write to // the same files again. dir.SetPreventDoubleWrite(false); IndexReader reader = IndexReader.Open(dir, false); // For each disk size, first try to commit against // dir that will hit random IOExceptions & disk // full; after, give it infinite disk space & turn // off random IOExceptions & retry w/ same reader: bool success = false; for (int x = 0; x < 2; x++) { double rate = 0.05; double diskRatio = ((double) diskFree) / diskUsage; long thisDiskFree; System.String testName; if (0 == x) { thisDiskFree = diskFree; if (diskRatio >= 2.0) { rate /= 2; } if (diskRatio >= 4.0) { rate /= 2; } if (diskRatio >= 6.0) { rate = 0.0; } if (debug) { System.Console.Out.WriteLine("\ncycle: " + diskFree + " bytes"); } testName = "disk full during reader.close() @ " + thisDiskFree + " bytes"; } else { thisDiskFree = 0; rate = 0.0; if (debug) { System.Console.Out.WriteLine("\ncycle: same writer: unlimited disk space"); } testName = "reader re-use after disk full"; } dir.SetMaxSizeInBytes(thisDiskFree); dir.SetRandomIOExceptionRate(rate, diskFree); try { if (0 == x) { int docId = 12; for (int i = 0; i < 13; i++) { reader.DeleteDocument(docId); reader.SetNorm(docId, "contents", (float) 2.0); docId += 12; } } reader.Close(); success = true; if (0 == x) { done = true; } } catch (System.IO.IOException e) { if (debug) { System.Console.Out.WriteLine(" hit IOException: " + e); System.Console.Out.WriteLine(e.StackTrace); } err = e; if (1 == x) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + " hit IOException after disk space was freed up"); } } // Whether we succeeded or failed, check that all // un-referenced files were in fact deleted (ie, // we did not create garbage). Just create a // new IndexFileDeleter, have it delete // unreferenced files, then verify that in fact // no files were deleted: System.String[] startFiles = dir.ListAll(); SegmentInfos infos = new SegmentInfos(); infos.Read(dir); new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null,null); System.String[] endFiles = dir.ListAll(); System.Array.Sort(startFiles); System.Array.Sort(endFiles); //for(int i=0;i<startFiles.length;i++) { // System.out.println(" startFiles: " + i + ": " + startFiles[i]); //} if (!CollectionsHelper.Equals(startFiles, endFiles)) { System.String successStr; if (success) { successStr = "success"; } else { successStr = "IOException"; System.Console.Error.WriteLine(err.StackTrace); } Assert.Fail("reader.close() failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + ArrayToString(startFiles) + "\n after delete:\n " + ArrayToString(endFiles)); } // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if // we failed, we see either all docs or no docs // changed (transactional semantics): IndexReader newReader = null; try { newReader = IndexReader.Open(dir, false); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ":exception when creating IndexReader after disk full during close: " + e); } /* int result = newReader.docFreq(searchTerm); if (success) { if (result != END_COUNT) { fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result != START_COUNT && result != END_COUNT) { err.printStackTrace(); fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); } } */ IndexSearcher searcher = new IndexSearcher(newReader); ScoreDoc[] hits = null; try { hits = searcher.Search(new TermQuery(searchTerm), null, 1000).ScoreDocs; } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.Fail(testName + ": exception when searching: " + e); } int result2 = hits.Length; if (success) { if (result2 != END_COUNT) { Assert.Fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT); } } else { // On hitting exception we still may have added // all docs: if (result2 != START_COUNT && result2 != END_COUNT) { System.Console.Error.WriteLine(err.StackTrace); Assert.Fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT); } } searcher.Close(); newReader.Close(); if (result2 == END_COUNT) { break; } } dir.Close(); // Try again with 10 more bytes of free space: diskFree += 10; } startDir.Close(); }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexWithAdds(System.String dirName) { System.String origDirName = dirName; dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName)); // open writer IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origDirName, "24") < 0) { expected = 45; } else { expected = 46; } Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count"); writer.Close(); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Document d = searcher.Doc(hits[0].Doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); TestHits(hits, 44, searcher.IndexReader); searcher.Close(); // make sure we can do delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir, false); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float)2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 43, searcher.IndexReader); searcher.Close(); // optimize writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir, true); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].Doc); TestHits(hits, 43, searcher.IndexReader); Assert.AreEqual("22", d.Get("id"), "wrong first document"); searcher.Close(); dir.Close(); }
public virtual void TestSetBufferSize() { System.IO.FileInfo indexDir = new System.IO.FileInfo(System.IO.Path.Combine(SupportClass.AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetUseCompoundFile(false); for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open(dir); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(reader.DocFreq(ccc), 37); reader.DeleteDocument(0); Assert.AreEqual(reader.DocFreq(aaa), 37); dir.TweakBufferSizes(); reader.DeleteDocument(4); Assert.AreEqual(reader.DocFreq(bbb), 37); dir.TweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(new TermQuery(bbb)); dir.TweakBufferSizes(); Assert.AreEqual(35, hits.Length()); dir.TweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33"))); dir.TweakBufferSizes(); Assert.AreEqual(1, hits.Length()); hits = searcher.Search(new TermQuery(aaa)); dir.TweakBufferSizes(); Assert.AreEqual(35, hits.Length()); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
private int GetHitCount(Directory dir, Term term) { IndexSearcher searcher = new IndexSearcher(dir); int hitCount = searcher.Search(new TermQuery(term), null, 1000).TotalHits; searcher.Close(); return hitCount; }
public virtual void TestKeepLastNDeletionPolicyWithReader() { int N = 10; for (int pass = 0; pass < 2; pass++) { bool useCompoundFile = (pass % 2) != 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = useCompoundFile; writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = useCompoundFile; for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit writer.Close(); IndexReader reader = IndexReader.Open(dir, policy, false); reader.DeleteDocument(3 * i + 1); reader.SetNorm(4 * i + 1, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(16 * (1 + i), hits.Length); // this is a commit reader.Close(); searcher.Close(); } writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED); writer.UseCompoundFile = useCompoundFile; writer.Optimize(); // this is a commit writer.Close(); Assert.AreEqual(2 * (N + 2), policy.numOnInit); Assert.AreEqual(2 * (N + 2) - 1, policy.numOnCommit); IndexSearcher searcher2 = new IndexSearcher(dir, false); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(176, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 176; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir, true); // Work backwards in commits on what the expected // count should be. searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).ScoreDocs; if (i > 1) { if (i % 2 == 0) { expectedCount += 1; } else { expectedCount -= 17; } } Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last 5"); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator(); System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator(); TermDocs docs = reader.TermDocs(); try { while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = ((BufferedDeletes.Num) entry.Value).GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID iter = deletesFlushed.docIDs.GetEnumerator(); while (iter.MoveNext()) { int docID = ((System.Int32) iter.Current); if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Query query = (Query) entry.Key; int limit = ((System.Int32) entry.Value); Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; // Delete by term IEnumerator<KeyValuePair<object, object>> iter = deletesFlushed.terms.GetEnumerator(); while (iter.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current; Term term = (Term)entry.Key; TermDocs docs = reader.TermDocs(term); if (docs != null) { int limit = ((BufferedDeletes.Num)entry.Value).GetNum(); try { while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } finally { docs.Close(); } } } // Delete by docID IEnumerator<object> iter2 = deletesFlushed.docIDs.GetEnumerator(); while (iter2.MoveNext()) { int docID = (int)iter2.Current; if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = deletesFlushed.queries.GetEnumerator(); while (iter.MoveNext()) { KeyValuePair<object, object> entry = (KeyValuePair<object, object>)iter.Current; Query query = (Query)entry.Key; int limit = (int)entry.Value; Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader); while (scorer.Next()) { int docID = scorer.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } searcher.Close(); return any; } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(query); Assert.AreEqual(16, hits.Length()); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 3 * (N + 1) - 1); } else { Assert.AreEqual(2 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); Hits hits2 = searcher2.Search(query); Assert.AreEqual(0, hits2.Length()); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query); Assert.AreEqual(expectedCount, hits2.Length()); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexNoAdds(System.String dirName, bool autoCommit) { dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName)); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; Assert.AreEqual(34, hits.Length, "wrong number of hits"); Document d = searcher.Doc(hits[0].doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); searcher.Close(); // make sure we can do a delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float) 2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.GetIndexReader()); searcher.Close(); // optimize IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; Assert.AreEqual(33, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 33, searcher.GetIndexReader()); searcher.Close(); dir.Close(); }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(16, hits.Length); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (!autoCommit) { Assert.AreEqual(3 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
/* Open pre-lockless index, add docs, do a delete & * setNorm, and search */ public virtual void ChangeIndexWithAdds(System.String dirName, bool autoCommit) { System.String origDirName = dirName; dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName)); // open writer IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); // add 10 docs for (int i = 0; i < 10; i++) { AddDoc(writer, 35 + i); } // make sure writer sees right total -- writer seems not to know about deletes in .del? int expected; if (Compare(origDirName, "24") < 0) { expected = 45; } else { expected = 46; } Assert.AreEqual(expected, writer.DocCount(), "wrong doc count"); writer.Close(); // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; Document d = searcher.Doc(hits[0].doc); Assert.AreEqual("21", d.Get("id"), "wrong first document"); TestHits(hits, 44, searcher.GetIndexReader()); searcher.Close(); // make sure we can do delete & setNorm against this // pre-lockless segment: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "6"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "wrong delete count"); reader.SetNorm(22, "content", (float) 2.0); reader.Close(); // make sure they "took": searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].doc); Assert.AreEqual("22", d.Get("id"), "wrong first document"); TestHits(hits, 43, searcher.GetIndexReader()); searcher.Close(); // optimize writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; Assert.AreEqual(43, hits.Length, "wrong number of hits"); d = searcher.Doc(hits[0].doc); TestHits(hits, 43, searcher.GetIndexReader()); Assert.AreEqual("22", d.Get("id"), "wrong first document"); searcher.Close(); dir.Close(); }
/// <summary> Suggest similar words (restricted or not to a field of a user index)</summary> /// <param name="word">String the word you want a spell check done on /// </param> /// <param name="num_sug">int the number of suggest words /// </param> /// <param name="ir">the indexReader of the user index (can be null see field param) /// </param> /// <param name="field">String the field of the user index: if field is not null, the suggested /// words are restricted to the words present in this field. /// </param> /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word /// (only if restricted mode = (indexReader!=null and field!=null) /// </param> /// <throws> IOException </throws> /// <returns> String[] the sorted list of the suggest words with this 2 criteria: /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity /// of the suggest words in the field of the user index /// </returns> public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular) { float min = this.minScore; TRStringDistance sd = new TRStringDistance(word); int lengthWord = word.Length; int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0; if (!morePopular && goalFreq > 0) { return(new System.String[] { word }); // return the word if it exist in the index and i don't want a more popular word } BooleanQuery query = new BooleanQuery(); System.String[] grams; System.String key; for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = FormGrams(word, ng); // form word into ngrams (allow dups too) if (grams.Length == 0) { continue; // hmm } if (bStart > 0) { // should we boost prefixes? Add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) { // should we boost suffixes Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.Length; i++) { Add(query, key, grams[i]); } } IndexSearcher searcher = new IndexSearcher(this.spellindex); Hits hits = searcher.Search(query); SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug); int stop = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers SuggestWord sugword = new SuggestWord(); for (int i = 0; i < stop; i++) { sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word) if (sugword.string_Renamed.Equals(word)) { continue; // don't suggest a word for itself, that would be silly } //edit distance/normalize with the min word length sugword.score = 1.0f - ((float)sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord)); if (sugword.score < min) { continue; } if (ir != null) { // use the user index sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1) { // don't suggest a word that is not present in the field continue; } } sugqueue.Insert(sugword); if (sugqueue.Size() == num_sug) { //if queue full , maintain the min score min = ((SuggestWord)sugqueue.Top()).score; } sugword = new SuggestWord(); } // convert to array string System.String[] list = new System.String[sugqueue.Size()]; for (int i = sugqueue.Size() - 1; i >= 0; i--) { list[i] = ((SuggestWord)sugqueue.Pop()).string_Renamed; } searcher.Close(); return(list); }
public virtual void TestSetBufferSize() { System.IO.DirectoryInfo indexDir = new System.IO.DirectoryInfo(System.IO.Path.Combine(AppSettings.Get("tempDir", ""), "testSetBufferSize")); MockFSDirectory dir = new MockFSDirectory(indexDir, NewRandom()); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.UseCompoundFile = false; for (int i = 0; i < 37; i++) { Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); dir.allIndexInputs.Clear(); IndexReader reader = IndexReader.Open(dir, false); Term aaa = new Term("content", "aaa"); Term bbb = new Term("content", "bbb"); Term ccc = new Term("content", "ccc"); Assert.AreEqual(37, reader.DocFreq(ccc)); reader.DeleteDocument(0); Assert.AreEqual(37, reader.DocFreq(aaa)); dir.tweakBufferSizes(); reader.DeleteDocument(4); Assert.AreEqual(reader.DocFreq(bbb), 37); dir.tweakBufferSizes(); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(new TermQuery(bbb), null, 1000).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); dir.tweakBufferSizes(); hits = searcher.Search(new TermQuery(new Term("id", "33")), null, 1000).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(1, hits.Length); hits = searcher.Search(new TermQuery(aaa), null, 1000).ScoreDocs; dir.tweakBufferSizes(); Assert.AreEqual(35, hits.Length); searcher.Close(); reader.Close(); } finally { _TestUtil.RmDir(indexDir); } }
public string GetStudentsByYearIdAndTimesIdAndSchoolIdAndStudentName(string schoolYear, string times, string schoolId, string StudentName, string pIndex) { string result = string.Empty; int pageIndex = Int32.Parse(pIndex); ArrayList students = new ArrayList(); string pathOfIndexFile = Server.MapPath(System.Configuration.ConfigurationManager.AppSettings["StudentIndexing"].ToString()); if (Int32.Parse(schoolYear) >= 2000) { pathOfIndexFile += "\\" + schoolYear + "\\Index"; } string studentName = StudentName.Replace("\"", ""); studentName = "\"" + studentName + "\""; Lucene.Net.Search.IndexSearcher iSearcher = new Lucene.Net.Search.IndexSearcher(pathOfIndexFile); Lucene.Net.QueryParsers.QueryParser qYearParser = new Lucene.Net.QueryParsers.QueryParser("YearId", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iYearQuery = qYearParser.Parse(schoolYear); Lucene.Net.QueryParsers.QueryParser qTestDayParser = new Lucene.Net.QueryParsers.QueryParser("TestDayId", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iTestDayQuery = qTestDayParser.Parse(times); Lucene.Net.QueryParsers.QueryParser qStudentIdParser = new Lucene.Net.QueryParsers.QueryParser("StudentID", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iStudentIdQuery = qStudentIdParser.Parse("1"); ////////////////////////////////////////////////////////////////////// Lucene.Net.Search.BooleanQuery bQuery = new Lucene.Net.Search.BooleanQuery(); bQuery.Add(iYearQuery, Lucene.Net.Search.BooleanClause.Occur.MUST); bQuery.Add(iTestDayQuery, Lucene.Net.Search.BooleanClause.Occur.MUST); if (StudentName != " " && StudentName != "") { Lucene.Net.QueryParsers.QueryParser qStudentParser = new Lucene.Net.QueryParsers.QueryParser("StudentName", new Lucene.Net.Analysis.Standard.StandardAnalyzer()); Lucene.Net.Search.Query iStudentQuery = qStudentParser.Parse(studentName); bQuery.Add(iStudentQuery, Lucene.Net.Search.BooleanClause.Occur.MUST); } Lucene.Net.Search.Hits iHits = iSearcher.Search(bQuery); using (System.Data.SqlClient.SqlConnection con = new System.Data.SqlClient.SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PSCPortalConnectionString"].ConnectionString)) { con.Open(); //paging for (int i = pageIndex * 20 - 20; i < pageIndex * 20 && i < iHits.Length(); i++) { string yId = iHits.Doc(i).Get("YearId"); string stuId = iHits.Doc(i).Get("StudentID"); string testDayId = iHits.Doc(i).Get("TestDayId"); System.Data.SqlClient.SqlCommand com = new System.Data.SqlClient.SqlCommand(); com.Connection = con; com.CommandType = CommandType.Text; com.CommandText = @" select StudentTHPT.TotalMark,[RoundTotalMark],StudentTHPT.YearId,StudentTHPT.TestDayId,StudentId,FirstName+' '+MiddleName+' '+LastName as FullName,Sex,Birthday,MarkEncourage,Section.Name from StudentTHPT inner join Section on StudentTHPT.SectionId = Section.SectionId where StudentTHPT.YearId=@yearId and StudentTHPT.TestDayId=@timeId and StudentId = @studentId Order by LastName " ; com.Parameters.Add("@yearId", SqlDbType.NChar); com.Parameters["@yearId"].Value = yId; com.Parameters.Add("@timeId", SqlDbType.NVarChar); com.Parameters["@timeId"].Value = testDayId; com.Parameters.Add("@studentId", SqlDbType.NVarChar); com.Parameters["@studentId"].Value = stuId; using (System.Data.SqlClient.SqlDataReader reader = com.ExecuteReader()) { while (reader.Read()) { string fullName = reader["FullName"].ToString(); string birthday = reader["Birthday"].ToString().Trim(); string studentId = reader["StudentId"].ToString(); string total = iHits.Length().ToString(); // string markEncourage = reader["MarkEncourage"].ToString(); string totalMark = reader["TotalMark"].ToString(); string section = reader["Name"].ToString(); string roundTotalMark = reader["RoundTotalMark"].ToString(); Student s = new Student { StudentId = studentId, FullName = fullName, Birthday = birthday, Total = total, Section = section, TotalMark = totalMark, RoundTotalMark = roundTotalMark }; students.Add(s); } } } } iSearcher.Close(); System.Web.Script.Serialization.JavaScriptSerializer serialize = new System.Web.Script.Serialization.JavaScriptSerializer(); result = serialize.Serialize(students); return(result); }
public virtual void TestBasic() { Directory dir = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(2); writer.SetSimilarity(new SimpleSimilarity()); System.Text.StringBuilder sb = new System.Text.StringBuilder(265); System.String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.Append(term).Append(" "); System.String content = sb.ToString(); Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED); noTf.SetOmitTermFreqAndPositions(true); d.Add(noTf); Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED); d.Add(tf); writer.AddDocument(d); //System.out.println(d); } writer.Optimize(); // flush writer.Close(); _TestUtil.CheckIndex(dir); /* * Verify the index */ Searcher searcher = new IndexSearcher(dir); searcher.SetSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d2 = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d2); searcher.Search(q1, new AnonymousClassCountingHitCollector(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new AnonymousClassCountingHitCollector1(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new AnonymousClassCountingHitCollector2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new AnonymousClassCountingHitCollector3(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new AnonymousClassCountingHitCollector4(this)); Assert.IsTrue(15 == CountingHitCollector.GetCount()); searcher.Close(); dir.Close(); }
public virtual void TestFieldSetValue() { Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); Document doc = new Document(); doc.Add(field); doc.Add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.AddDocument(doc); field.SetValue("id2"); writer.AddDocument(doc); field.SetValue("id3"); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(dir); Query query = new TermQuery(new Term("keyword", "test")); // ensure that queries return expected results without DateFilter first ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(3, hits.Length); int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.Doc(hits[i].doc); Field f = doc2.GetField("id"); if (f.StringValue().Equals("id1")) result |= 1; else if (f.StringValue().Equals("id2")) result |= 2; else if (f.StringValue().Equals("id3")) result |= 4; else Assert.Fail("unexpected id field"); } searcher.Close(); dir.Close(); Assert.AreEqual(7, result, "did not see all IDs"); }
private void Search() { try { SearchProgressBar.Maximum = 11; ProgressLabel.Text = "Progress: Initialize Search ..."; Searcher searcher = new IndexSearcher(@"Canon\index"); Analyzer analyzer = new StandardAnalyzer(); ArrayList resultList = new ArrayList(); System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding); String line = QueryInputBox.Text; if (line.Length == - 1) return; ProgressLabel.Text = "Progress: Parsing Query ..."; Query query = QueryParser.Parse(line, "contents", analyzer); //int[] ix = qtm.GetTermFrequencies(); Hits hits = searcher.Search(query); SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Searched. Analyzing results ..."; //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, new WhitespaceAnalyzer(), "<B>", "</B>"); Highlighter highlighter = new Highlighter(new QueryScorer(query)); highlighter.SetTextFragmenter(new SimpleFragmenter(80)); int maxNumFragmentsRequired = 1; //int HITS_PER_PAGE = 10; for (int i = 0; i < 10; i++) { SearchProgressBar.Increment(1); ProgressLabel.Text = "Progress: Analyzing hit " + (i+1).ToString(); // get the document from index Document doc = hits.Doc(i); //SegmentReader ir = new SegmentReader(); //Lucene.Net.Index.TermFreqVector tfv = //tfv.GetTermFrequencies string score = hits.Score(i).ToString(); //Box += "Hit no. " + i + " scored: " + score + " occ: " + /*highlighter.tokenFrequency */ " best fragment: \n"; ResultSet a = new ResultSet(); a.BookName = doc.Get("path").Replace(@"c:\cscd\temp\",""); a.Score = hits.Score(i); a.numberOfHits = hits.Length(); // get the document filename // we can't get the text from the index //because we didn't store it there //so get it from archive string path = doc.Get("path"); string name = GetInternalName(path); PaliReaderUtils.AalekhDecoder.UnzipFromZipLibrary(name); path = System.IO.Directory.GetCurrentDirectory() + @"\Work\" + name + ".htm"; string plainText = ""; //load text from zip archive temporarily using (StreamReader sr = new StreamReader(path, System.Text.Encoding.Default)) { plainText = parseHtml(sr.ReadToEnd()); } //-------------------------------Highlighter Code 1.4 TokenStream tokenStream = analyzer.TokenStream(new StringReader(plainText)); a.textFragment = highlighter.GetBestFragments(tokenStream, plainText, maxNumFragmentsRequired, "..."); if(File.Exists(path)) File.Delete(path); //------------------------------- resultList.Add(a); } SearchProgressBar.Value = 0; searcher.Close(); ssr = new ShowSearchResults(/*Box*/resultList); //this.Hide(); ssr.OpenBookEvent += new ShowSearchResults.OpenBook(this.TriggerOpenBook); ssr.Closing += new System.ComponentModel.CancelEventHandler(this.Closing_ResultWindow); this.Hide(); ssr.ShowDialog(); } catch (System.Exception e) { MessageBox.Show(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
public override void TearDown() { base.TearDown(); searcher.Close(); }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term TermDocs docs = reader.TermDocs(); try { foreach(KeyValuePair<Term,BufferedDeletes.Num> entry in deletesFlushed.terms) { Term term = entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = entry.Value.GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID foreach(int docID in deletesFlushed.docIDs) { if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); foreach(KeyValuePair<Query,int> entry in new Support.Dictionary<Query,int>(deletesFlushed.queries)) { Query query = entry.Key; int limit = entry.Value; Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }